1/* 2 * linux/fs/ext3/super.c 3 * 4 * Copyright (C) 1992, 1993, 1994, 1995 5 * Remy Card (card@masi.ibp.fr) 6 * Laboratoire MASI - Institut Blaise Pascal 7 * Universite Pierre et Marie Curie (Paris VI) 8 * 9 * from 10 * 11 * linux/fs/minix/inode.c 12 * 13 * Copyright (C) 1991, 1992 Linus Torvalds 14 * 15 * Big-endian to little-endian byte-swapping/bitmaps by 16 * David S. Miller (davem@caip.rutgers.edu), 1995 17 */ 18 19#include <linux/module.h> 20#include <linux/string.h> 21#include <linux/fs.h> 22#include <linux/time.h> 23#include <linux/jbd.h> 24#include <linux/ext3_fs.h> 25#include <linux/ext3_jbd.h> 26#include <linux/slab.h> 27#include <linux/init.h> 28#include <linux/blkdev.h> 29#include <linux/parser.h> 30#include <linux/smp_lock.h> 31#include <linux/buffer_head.h> 32#include <linux/vfs.h> 33#include <linux/random.h> 34#include <linux/mount.h> 35#include <linux/namei.h> 36#include <linux/quotaops.h> 37#include <linux/seq_file.h> 38 39#include <asm/uaccess.h> 40 41#include "xattr.h" 42#include "acl.h" 43#include "namei.h" 44 45static int ext3_load_journal(struct super_block *, struct ext3_super_block *, 46 unsigned long journal_devnum); 47static int ext3_create_journal(struct super_block *, struct ext3_super_block *, 48 unsigned int); 49static void ext3_commit_super (struct super_block * sb, 50 struct ext3_super_block * es, 51 int sync); 52static void ext3_mark_recovery_complete(struct super_block * sb, 53 struct ext3_super_block * es); 54static void ext3_clear_journal_err(struct super_block * sb, 55 struct ext3_super_block * es); 56static int ext3_sync_fs(struct super_block *sb, int wait); 57static const char *ext3_decode_error(struct super_block * sb, int errno, 58 char nbuf[16]); 59static int ext3_remount (struct super_block * sb, int * flags, char * data); 60static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf); 61static void ext3_unlockfs(struct super_block *sb); 62static void ext3_write_super (struct super_block * sb); 63static void ext3_write_super_lockfs(struct super_block *sb); 64 65/* 66 * Wrappers for journal_start/end. 67 * 68 * The only special thing we need to do here is to make sure that all 69 * journal_end calls result in the superblock being marked dirty, so 70 * that sync() will call the filesystem's write_super callback if 71 * appropriate. 72 */ 73handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks) 74{ 75 journal_t *journal; 76 77 if (sb->s_flags & MS_RDONLY) 78 return ERR_PTR(-EROFS); 79 80 /* Special case here: if the journal has aborted behind our 81 * backs (eg. EIO in the commit thread), then we still need to 82 * take the FS itself readonly cleanly. */ 83 journal = EXT3_SB(sb)->s_journal; 84 if (is_journal_aborted(journal)) { 85 ext3_abort(sb, __FUNCTION__, 86 "Detected aborted journal"); 87 return ERR_PTR(-EROFS); 88 } 89 90 return journal_start(journal, nblocks); 91} 92 93/* 94 * The only special thing we need to do here is to make sure that all 95 * journal_stop calls result in the superblock being marked dirty, so 96 * that sync() will call the filesystem's write_super callback if 97 * appropriate. 98 */ 99int __ext3_journal_stop(const char *where, handle_t *handle) 100{ 101 struct super_block *sb; 102 int err; 103 int rc; 104 105 sb = handle->h_transaction->t_journal->j_private; 106 err = handle->h_err; 107 rc = journal_stop(handle); 108 109 if (!err) 110 err = rc; 111 if (err) 112 __ext3_std_error(sb, where, err); 113 return err; 114} 115 116void ext3_journal_abort_handle(const char *caller, const char *err_fn, 117 struct buffer_head *bh, handle_t *handle, int err) 118{ 119 char nbuf[16]; 120 const char *errstr = ext3_decode_error(NULL, err, nbuf); 121 122 if (bh) 123 BUFFER_TRACE(bh, "abort"); 124 125 if (!handle->h_err) 126 handle->h_err = err; 127 128 if (is_handle_aborted(handle)) 129 return; 130 131 printk(KERN_ERR "%s: aborting transaction: %s in %s\n", 132 caller, errstr, err_fn); 133 134 journal_abort_handle(handle); 135} 136 137/* Deal with the reporting of failure conditions on a filesystem such as 138 * inconsistencies detected or read IO failures. 139 * 140 * On ext2, we can store the error state of the filesystem in the 141 * superblock. That is not possible on ext3, because we may have other 142 * write ordering constraints on the superblock which prevent us from 143 * writing it out straight away; and given that the journal is about to 144 * be aborted, we can't rely on the current, or future, transactions to 145 * write out the superblock safely. 146 * 147 * We'll just use the journal_abort() error code to record an error in 148 * the journal instead. On recovery, the journal will compain about 149 * that error until we've noted it down and cleared it. 150 */ 151 152static void ext3_handle_error(struct super_block *sb) 153{ 154 struct ext3_super_block *es = EXT3_SB(sb)->s_es; 155 156 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; 157 es->s_state |= cpu_to_le16(EXT3_ERROR_FS); 158 159 if (sb->s_flags & MS_RDONLY) 160 return; 161 162 if (!test_opt (sb, ERRORS_CONT)) { 163 journal_t *journal = EXT3_SB(sb)->s_journal; 164 165 EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT; 166 if (journal) 167 journal_abort(journal, -EIO); 168 } 169 if (test_opt (sb, ERRORS_RO)) { 170 printk (KERN_CRIT "Remounting filesystem read-only\n"); 171 sb->s_flags |= MS_RDONLY; 172 } 173 ext3_commit_super(sb, es, 1); 174 if (test_opt(sb, ERRORS_PANIC)) 175 panic("EXT3-fs (device %s): panic forced after error\n", 176 sb->s_id); 177} 178 179void ext3_error (struct super_block * sb, const char * function, 180 const char * fmt, ...) 181{ 182 va_list args; 183 184 va_start(args, fmt); 185 printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function); 186 vprintk(fmt, args); 187 printk("\n"); 188 va_end(args); 189 190 ext3_handle_error(sb); 191} 192 193static const char *ext3_decode_error(struct super_block * sb, int errno, 194 char nbuf[16]) 195{ 196 char *errstr = NULL; 197 198 switch (errno) { 199 case -EIO: 200 errstr = "IO failure"; 201 break; 202 case -ENOMEM: 203 errstr = "Out of memory"; 204 break; 205 case -EROFS: 206 if (!sb || EXT3_SB(sb)->s_journal->j_flags & JFS_ABORT) 207 errstr = "Journal has aborted"; 208 else 209 errstr = "Readonly filesystem"; 210 break; 211 default: 212 /* If the caller passed in an extra buffer for unknown 213 * errors, textualise them now. Else we just return 214 * NULL. */ 215 if (nbuf) { 216 /* Check for truncated error codes... */ 217 if (snprintf(nbuf, 16, "error %d", -errno) >= 0) 218 errstr = nbuf; 219 } 220 break; 221 } 222 223 return errstr; 224} 225 226/* __ext3_std_error decodes expected errors from journaling functions 227 * automatically and invokes the appropriate error response. */ 228 229void __ext3_std_error (struct super_block * sb, const char * function, 230 int errno) 231{ 232 char nbuf[16]; 233 const char *errstr; 234 235 /* Special case: if the error is EROFS, and we're not already 236 * inside a transaction, then there's really no point in logging 237 * an error. */ 238 if (errno == -EROFS && journal_current_handle() == NULL && 239 (sb->s_flags & MS_RDONLY)) 240 return; 241 242 errstr = ext3_decode_error(sb, errno, nbuf); 243 printk (KERN_CRIT "EXT3-fs error (device %s) in %s: %s\n", 244 sb->s_id, function, errstr); 245 246 ext3_handle_error(sb); 247} 248 249/* 250 * ext3_abort is a much stronger failure handler than ext3_error. The 251 * abort function may be used to deal with unrecoverable failures such 252 * as journal IO errors or ENOMEM at a critical moment in log management. 253 * 254 * We unconditionally force the filesystem into an ABORT|READONLY state, 255 * unless the error response on the fs has been set to panic in which 256 * case we take the easy way out and panic immediately. 257 */ 258 259void ext3_abort (struct super_block * sb, const char * function, 260 const char * fmt, ...) 261{ 262 va_list args; 263 264 printk (KERN_CRIT "ext3_abort called.\n"); 265 266 va_start(args, fmt); 267 printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function); 268 vprintk(fmt, args); 269 printk("\n"); 270 va_end(args); 271 272 if (test_opt(sb, ERRORS_PANIC)) 273 panic("EXT3-fs panic from previous error\n"); 274 275 if (sb->s_flags & MS_RDONLY) 276 return; 277 278 printk(KERN_CRIT "Remounting filesystem read-only\n"); 279 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; 280 sb->s_flags |= MS_RDONLY; 281 EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT; 282 journal_abort(EXT3_SB(sb)->s_journal, -EIO); 283} 284 285void ext3_warning (struct super_block * sb, const char * function, 286 const char * fmt, ...) 287{ 288 va_list args; 289 290 va_start(args, fmt); 291 printk(KERN_WARNING "EXT3-fs warning (device %s): %s: ", 292 sb->s_id, function); 293 vprintk(fmt, args); 294 printk("\n"); 295 va_end(args); 296} 297 298void ext3_update_dynamic_rev(struct super_block *sb) 299{ 300 struct ext3_super_block *es = EXT3_SB(sb)->s_es; 301 302 if (le32_to_cpu(es->s_rev_level) > EXT3_GOOD_OLD_REV) 303 return; 304 305 ext3_warning(sb, __FUNCTION__, 306 "updating to rev %d because of new feature flag, " 307 "running e2fsck is recommended", 308 EXT3_DYNAMIC_REV); 309 310 es->s_first_ino = cpu_to_le32(EXT3_GOOD_OLD_FIRST_INO); 311 es->s_inode_size = cpu_to_le16(EXT3_GOOD_OLD_INODE_SIZE); 312 es->s_rev_level = cpu_to_le32(EXT3_DYNAMIC_REV); 313 /* leave es->s_feature_*compat flags alone */ 314 /* es->s_uuid will be set by e2fsck if empty */ 315 316 /* 317 * The rest of the superblock fields should be zero, and if not it 318 * means they are likely already in use, so leave them alone. We 319 * can leave it up to e2fsck to clean up any inconsistencies there. 320 */ 321} 322 323/* 324 * Open the external journal device 325 */ 326static struct block_device *ext3_blkdev_get(dev_t dev) 327{ 328 struct block_device *bdev; 329 char b[BDEVNAME_SIZE]; 330 331 bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); 332 if (IS_ERR(bdev)) 333 goto fail; 334 return bdev; 335 336fail: 337 printk(KERN_ERR "EXT3: failed to open journal device %s: %ld\n", 338 __bdevname(dev, b), PTR_ERR(bdev)); 339 return NULL; 340} 341 342/* 343 * Release the journal device 344 */ 345static int ext3_blkdev_put(struct block_device *bdev) 346{ 347 bd_release(bdev); 348 return blkdev_put(bdev); 349} 350 351static int ext3_blkdev_remove(struct ext3_sb_info *sbi) 352{ 353 struct block_device *bdev; 354 int ret = -ENODEV; 355 356 bdev = sbi->journal_bdev; 357 if (bdev) { 358 ret = ext3_blkdev_put(bdev); 359 sbi->journal_bdev = NULL; 360 } 361 return ret; 362} 363 364static inline struct inode *orphan_list_entry(struct list_head *l) 365{ 366 return &list_entry(l, struct ext3_inode_info, i_orphan)->vfs_inode; 367} 368 369static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi) 370{ 371 struct list_head *l; 372 373 printk(KERN_ERR "sb orphan head is %d\n", 374 le32_to_cpu(sbi->s_es->s_last_orphan)); 375 376 printk(KERN_ERR "sb_info orphan list:\n"); 377 list_for_each(l, &sbi->s_orphan) { 378 struct inode *inode = orphan_list_entry(l); 379 printk(KERN_ERR " " 380 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n", 381 inode->i_sb->s_id, inode->i_ino, inode, 382 inode->i_mode, inode->i_nlink, 383 NEXT_ORPHAN(inode)); 384 } 385} 386 387static void ext3_put_super (struct super_block * sb) 388{ 389 struct ext3_sb_info *sbi = EXT3_SB(sb); 390 struct ext3_super_block *es = sbi->s_es; 391 int i; 392 393 ext3_xattr_put_super(sb); 394 journal_destroy(sbi->s_journal); 395 if (!(sb->s_flags & MS_RDONLY)) { 396 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 397 es->s_state = cpu_to_le16(sbi->s_mount_state); 398 BUFFER_TRACE(sbi->s_sbh, "marking dirty"); 399 mark_buffer_dirty(sbi->s_sbh); 400 ext3_commit_super(sb, es, 1); 401 } 402 403 for (i = 0; i < sbi->s_gdb_count; i++) 404 brelse(sbi->s_group_desc[i]); 405 kfree(sbi->s_group_desc); 406 percpu_counter_destroy(&sbi->s_freeblocks_counter); 407 percpu_counter_destroy(&sbi->s_freeinodes_counter); 408 percpu_counter_destroy(&sbi->s_dirs_counter); 409 brelse(sbi->s_sbh); 410#ifdef CONFIG_QUOTA 411 for (i = 0; i < MAXQUOTAS; i++) 412 kfree(sbi->s_qf_names[i]); 413#endif 414 415 /* Debugging code just in case the in-memory inode orphan list 416 * isn't empty. The on-disk one can be non-empty if we've 417 * detected an error and taken the fs readonly, but the 418 * in-memory list had better be clean by this point. */ 419 if (!list_empty(&sbi->s_orphan)) 420 dump_orphan_list(sb, sbi); 421 J_ASSERT(list_empty(&sbi->s_orphan)); 422 423 invalidate_bdev(sb->s_bdev); 424 if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) { 425 /* 426 * Invalidate the journal device's buffers. We don't want them 427 * floating about in memory - the physical journal device may 428 * hotswapped, and it breaks the `ro-after' testing code. 429 */ 430 sync_blockdev(sbi->journal_bdev); 431 invalidate_bdev(sbi->journal_bdev); 432 ext3_blkdev_remove(sbi); 433 } 434 sb->s_fs_info = NULL; 435 kfree(sbi); 436 return; 437} 438 439static struct kmem_cache *ext3_inode_cachep; 440 441/* 442 * Called inside transaction, so use GFP_NOFS 443 */ 444static struct inode *ext3_alloc_inode(struct super_block *sb) 445{ 446 struct ext3_inode_info *ei; 447 448 ei = kmem_cache_alloc(ext3_inode_cachep, GFP_NOFS); 449 if (!ei) 450 return NULL; 451#ifdef CONFIG_EXT3_FS_POSIX_ACL 452 ei->i_acl = EXT3_ACL_NOT_CACHED; 453 ei->i_default_acl = EXT3_ACL_NOT_CACHED; 454#endif 455 ei->i_block_alloc_info = NULL; 456 ei->vfs_inode.i_version = 1; 457 return &ei->vfs_inode; 458} 459 460static void ext3_destroy_inode(struct inode *inode) 461{ 462 kmem_cache_free(ext3_inode_cachep, EXT3_I(inode)); 463} 464 465static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) 466{ 467 struct ext3_inode_info *ei = (struct ext3_inode_info *) foo; 468 469 INIT_LIST_HEAD(&ei->i_orphan); 470#ifdef CONFIG_EXT3_FS_XATTR 471 init_rwsem(&ei->xattr_sem); 472#endif 473 mutex_init(&ei->truncate_mutex); 474 inode_init_once(&ei->vfs_inode); 475} 476 477static int init_inodecache(void) 478{ 479 ext3_inode_cachep = kmem_cache_create("ext3_inode_cache", 480 sizeof(struct ext3_inode_info), 481 0, (SLAB_RECLAIM_ACCOUNT| 482 SLAB_MEM_SPREAD), 483 init_once, NULL); 484 if (ext3_inode_cachep == NULL) 485 return -ENOMEM; 486 return 0; 487} 488 489static void destroy_inodecache(void) 490{ 491 kmem_cache_destroy(ext3_inode_cachep); 492} 493 494static void ext3_clear_inode(struct inode *inode) 495{ 496 struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info; 497#ifdef CONFIG_EXT3_FS_POSIX_ACL 498 if (EXT3_I(inode)->i_acl && 499 EXT3_I(inode)->i_acl != EXT3_ACL_NOT_CACHED) { 500 posix_acl_release(EXT3_I(inode)->i_acl); 501 EXT3_I(inode)->i_acl = EXT3_ACL_NOT_CACHED; 502 } 503 if (EXT3_I(inode)->i_default_acl && 504 EXT3_I(inode)->i_default_acl != EXT3_ACL_NOT_CACHED) { 505 posix_acl_release(EXT3_I(inode)->i_default_acl); 506 EXT3_I(inode)->i_default_acl = EXT3_ACL_NOT_CACHED; 507 } 508#endif 509 ext3_discard_reservation(inode); 510 EXT3_I(inode)->i_block_alloc_info = NULL; 511 if (unlikely(rsv)) 512 kfree(rsv); 513} 514 515static inline void ext3_show_quota_options(struct seq_file *seq, struct super_block *sb) 516{ 517#if defined(CONFIG_QUOTA) 518 struct ext3_sb_info *sbi = EXT3_SB(sb); 519 520 if (sbi->s_jquota_fmt) 521 seq_printf(seq, ",jqfmt=%s", 522 (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0"); 523 524 if (sbi->s_qf_names[USRQUOTA]) 525 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); 526 527 if (sbi->s_qf_names[GRPQUOTA]) 528 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); 529 530 if (sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA) 531 seq_puts(seq, ",usrquota"); 532 533 if (sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA) 534 seq_puts(seq, ",grpquota"); 535#endif 536} 537 538static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs) 539{ 540 struct super_block *sb = vfs->mnt_sb; 541 542 if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA) 543 seq_puts(seq, ",data=journal"); 544 else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA) 545 seq_puts(seq, ",data=ordered"); 546 else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA) 547 seq_puts(seq, ",data=writeback"); 548 549 ext3_show_quota_options(seq, sb); 550 551 return 0; 552} 553 554 555static struct dentry *ext3_get_dentry(struct super_block *sb, void *vobjp) 556{ 557 __u32 *objp = vobjp; 558 unsigned long ino = objp[0]; 559 __u32 generation = objp[1]; 560 struct inode *inode; 561 struct dentry *result; 562 563 if (ino < EXT3_FIRST_INO(sb) && ino != EXT3_ROOT_INO) 564 return ERR_PTR(-ESTALE); 565 if (ino > le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count)) 566 return ERR_PTR(-ESTALE); 567 568 /* iget isn't really right if the inode is currently unallocated!! 569 * 570 * ext3_read_inode will return a bad_inode if the inode had been 571 * deleted, so we should be safe. 572 * 573 * Currently we don't know the generation for parent directory, so 574 * a generation of 0 means "accept any" 575 */ 576 inode = iget(sb, ino); 577 if (inode == NULL) 578 return ERR_PTR(-ENOMEM); 579 if (is_bad_inode(inode) || 580 (generation && inode->i_generation != generation)) { 581 iput(inode); 582 return ERR_PTR(-ESTALE); 583 } 584 /* now to find a dentry. 585 * If possible, get a well-connected one 586 */ 587 result = d_alloc_anon(inode); 588 if (!result) { 589 iput(inode); 590 return ERR_PTR(-ENOMEM); 591 } 592 return result; 593} 594 595#ifdef CONFIG_QUOTA 596#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group") 597#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) 598 599static int ext3_dquot_initialize(struct inode *inode, int type); 600static int ext3_dquot_drop(struct inode *inode); 601static int ext3_write_dquot(struct dquot *dquot); 602static int ext3_acquire_dquot(struct dquot *dquot); 603static int ext3_release_dquot(struct dquot *dquot); 604static int ext3_mark_dquot_dirty(struct dquot *dquot); 605static int ext3_write_info(struct super_block *sb, int type); 606static int ext3_quota_on(struct super_block *sb, int type, int format_id, char *path); 607static int ext3_quota_on_mount(struct super_block *sb, int type); 608static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data, 609 size_t len, loff_t off); 610static ssize_t ext3_quota_write(struct super_block *sb, int type, 611 const char *data, size_t len, loff_t off); 612 613static struct dquot_operations ext3_quota_operations = { 614 .initialize = ext3_dquot_initialize, 615 .drop = ext3_dquot_drop, 616 .alloc_space = dquot_alloc_space, 617 .alloc_inode = dquot_alloc_inode, 618 .free_space = dquot_free_space, 619 .free_inode = dquot_free_inode, 620 .transfer = dquot_transfer, 621 .write_dquot = ext3_write_dquot, 622 .acquire_dquot = ext3_acquire_dquot, 623 .release_dquot = ext3_release_dquot, 624 .mark_dirty = ext3_mark_dquot_dirty, 625 .write_info = ext3_write_info 626}; 627 628static struct quotactl_ops ext3_qctl_operations = { 629 .quota_on = ext3_quota_on, 630 .quota_off = vfs_quota_off, 631 .quota_sync = vfs_quota_sync, 632 .get_info = vfs_get_dqinfo, 633 .set_info = vfs_set_dqinfo, 634 .get_dqblk = vfs_get_dqblk, 635 .set_dqblk = vfs_set_dqblk 636}; 637#endif 638 639static const struct super_operations ext3_sops = { 640 .alloc_inode = ext3_alloc_inode, 641 .destroy_inode = ext3_destroy_inode, 642 .read_inode = ext3_read_inode, 643 .write_inode = ext3_write_inode, 644 .dirty_inode = ext3_dirty_inode, 645 .delete_inode = ext3_delete_inode, 646 .put_super = ext3_put_super, 647 .write_super = ext3_write_super, 648 .sync_fs = ext3_sync_fs, 649 .write_super_lockfs = ext3_write_super_lockfs, 650 .unlockfs = ext3_unlockfs, 651 .statfs = ext3_statfs, 652 .remount_fs = ext3_remount, 653 .clear_inode = ext3_clear_inode, 654 .show_options = ext3_show_options, 655#ifdef CONFIG_QUOTA 656 .quota_read = ext3_quota_read, 657 .quota_write = ext3_quota_write, 658#endif 659}; 660 661static struct export_operations ext3_export_ops = { 662 .get_parent = ext3_get_parent, 663 .get_dentry = ext3_get_dentry, 664}; 665 666enum { 667 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, 668 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 669 Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, 670 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 671 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, 672 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, 673 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 674 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 675 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, 676 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, 677 Opt_grpquota 678}; 679 680static match_table_t tokens = { 681 {Opt_bsd_df, "bsddf"}, 682 {Opt_minix_df, "minixdf"}, 683 {Opt_grpid, "grpid"}, 684 {Opt_grpid, "bsdgroups"}, 685 {Opt_nogrpid, "nogrpid"}, 686 {Opt_nogrpid, "sysvgroups"}, 687 {Opt_resgid, "resgid=%u"}, 688 {Opt_resuid, "resuid=%u"}, 689 {Opt_sb, "sb=%u"}, 690 {Opt_err_cont, "errors=continue"}, 691 {Opt_err_panic, "errors=panic"}, 692 {Opt_err_ro, "errors=remount-ro"}, 693 {Opt_nouid32, "nouid32"}, 694 {Opt_nocheck, "nocheck"}, 695 {Opt_nocheck, "check=none"}, 696 {Opt_debug, "debug"}, 697 {Opt_oldalloc, "oldalloc"}, 698 {Opt_orlov, "orlov"}, 699 {Opt_user_xattr, "user_xattr"}, 700 {Opt_nouser_xattr, "nouser_xattr"}, 701 {Opt_acl, "acl"}, 702 {Opt_noacl, "noacl"}, 703 {Opt_reservation, "reservation"}, 704 {Opt_noreservation, "noreservation"}, 705 {Opt_noload, "noload"}, 706 {Opt_nobh, "nobh"}, 707 {Opt_bh, "bh"}, 708 {Opt_commit, "commit=%u"}, 709 {Opt_journal_update, "journal=update"}, 710 {Opt_journal_inum, "journal=%u"}, 711 {Opt_journal_dev, "journal_dev=%u"}, 712 {Opt_abort, "abort"}, 713 {Opt_data_journal, "data=journal"}, 714 {Opt_data_ordered, "data=ordered"}, 715 {Opt_data_writeback, "data=writeback"}, 716 {Opt_offusrjquota, "usrjquota="}, 717 {Opt_usrjquota, "usrjquota=%s"}, 718 {Opt_offgrpjquota, "grpjquota="}, 719 {Opt_grpjquota, "grpjquota=%s"}, 720 {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, 721 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, 722 {Opt_grpquota, "grpquota"}, 723 {Opt_noquota, "noquota"}, 724 {Opt_quota, "quota"}, 725 {Opt_usrquota, "usrquota"}, 726 {Opt_barrier, "barrier=%u"}, 727 {Opt_err, NULL}, 728 {Opt_resize, "resize"}, 729}; 730 731static ext3_fsblk_t get_sb_block(void **data) 732{ 733 ext3_fsblk_t sb_block; 734 char *options = (char *) *data; 735 736 if (!options || strncmp(options, "sb=", 3) != 0) 737 return 1; /* Default location */ 738 options += 3; 739 /*todo: use simple_strtoll with >32bit ext3 */ 740 sb_block = simple_strtoul(options, &options, 0); 741 if (*options && *options != ',') { 742 printk("EXT3-fs: Invalid sb specification: %s\n", 743 (char *) *data); 744 return 1; 745 } 746 if (*options == ',') 747 options++; 748 *data = (void *) options; 749 return sb_block; 750} 751 752static int parse_options (char *options, struct super_block *sb, 753 unsigned int *inum, unsigned long *journal_devnum, 754 ext3_fsblk_t *n_blocks_count, int is_remount) 755{ 756 struct ext3_sb_info *sbi = EXT3_SB(sb); 757 char * p; 758 substring_t args[MAX_OPT_ARGS]; 759 int data_opt = 0; 760 int option; 761#ifdef CONFIG_QUOTA 762 int qtype; 763 char *qname; 764#endif 765 766 if (!options) 767 return 1; 768 769 while ((p = strsep (&options, ",")) != NULL) { 770 int token; 771 if (!*p) 772 continue; 773 774 token = match_token(p, tokens, args); 775 switch (token) { 776 case Opt_bsd_df: 777 clear_opt (sbi->s_mount_opt, MINIX_DF); 778 break; 779 case Opt_minix_df: 780 set_opt (sbi->s_mount_opt, MINIX_DF); 781 break; 782 case Opt_grpid: 783 set_opt (sbi->s_mount_opt, GRPID); 784 break; 785 case Opt_nogrpid: 786 clear_opt (sbi->s_mount_opt, GRPID); 787 break; 788 case Opt_resuid: 789 if (match_int(&args[0], &option)) 790 return 0; 791 sbi->s_resuid = option; 792 break; 793 case Opt_resgid: 794 if (match_int(&args[0], &option)) 795 return 0; 796 sbi->s_resgid = option; 797 break; 798 case Opt_sb: 799 /* handled by get_sb_block() instead of here */ 800 /* *sb_block = match_int(&args[0]); */ 801 break; 802 case Opt_err_panic: 803 clear_opt (sbi->s_mount_opt, ERRORS_CONT); 804 clear_opt (sbi->s_mount_opt, ERRORS_RO); 805 set_opt (sbi->s_mount_opt, ERRORS_PANIC); 806 break; 807 case Opt_err_ro: 808 clear_opt (sbi->s_mount_opt, ERRORS_CONT); 809 clear_opt (sbi->s_mount_opt, ERRORS_PANIC); 810 set_opt (sbi->s_mount_opt, ERRORS_RO); 811 break; 812 case Opt_err_cont: 813 clear_opt (sbi->s_mount_opt, ERRORS_RO); 814 clear_opt (sbi->s_mount_opt, ERRORS_PANIC); 815 set_opt (sbi->s_mount_opt, ERRORS_CONT); 816 break; 817 case Opt_nouid32: 818 set_opt (sbi->s_mount_opt, NO_UID32); 819 break; 820 case Opt_nocheck: 821 clear_opt (sbi->s_mount_opt, CHECK); 822 break; 823 case Opt_debug: 824 set_opt (sbi->s_mount_opt, DEBUG); 825 break; 826 case Opt_oldalloc: 827 set_opt (sbi->s_mount_opt, OLDALLOC); 828 break; 829 case Opt_orlov: 830 clear_opt (sbi->s_mount_opt, OLDALLOC); 831 break; 832#ifdef CONFIG_EXT3_FS_XATTR 833 case Opt_user_xattr: 834 set_opt (sbi->s_mount_opt, XATTR_USER); 835 break; 836 case Opt_nouser_xattr: 837 clear_opt (sbi->s_mount_opt, XATTR_USER); 838 break; 839#else 840 case Opt_user_xattr: 841 case Opt_nouser_xattr: 842 printk("EXT3 (no)user_xattr options not supported\n"); 843 break; 844#endif 845#ifdef CONFIG_EXT3_FS_POSIX_ACL 846 case Opt_acl: 847 set_opt(sbi->s_mount_opt, POSIX_ACL); 848 break; 849 case Opt_noacl: 850 clear_opt(sbi->s_mount_opt, POSIX_ACL); 851 break; 852#else 853 case Opt_acl: 854 case Opt_noacl: 855 printk("EXT3 (no)acl options not supported\n"); 856 break; 857#endif 858 case Opt_reservation: 859 set_opt(sbi->s_mount_opt, RESERVATION); 860 break; 861 case Opt_noreservation: 862 clear_opt(sbi->s_mount_opt, RESERVATION); 863 break; 864 case Opt_journal_update: 865 /* Eventually we will want to be able to create 866 a journal file here. For now, only allow the 867 user to specify an existing inode to be the 868 journal file. */ 869 if (is_remount) { 870 printk(KERN_ERR "EXT3-fs: cannot specify " 871 "journal on remount\n"); 872 return 0; 873 } 874 set_opt (sbi->s_mount_opt, UPDATE_JOURNAL); 875 break; 876 case Opt_journal_inum: 877 if (is_remount) { 878 printk(KERN_ERR "EXT3-fs: cannot specify " 879 "journal on remount\n"); 880 return 0; 881 } 882 if (match_int(&args[0], &option)) 883 return 0; 884 *inum = option; 885 break; 886 case Opt_journal_dev: 887 if (is_remount) { 888 printk(KERN_ERR "EXT3-fs: cannot specify " 889 "journal on remount\n"); 890 return 0; 891 } 892 if (match_int(&args[0], &option)) 893 return 0; 894 *journal_devnum = option; 895 break; 896 case Opt_noload: 897 set_opt (sbi->s_mount_opt, NOLOAD); 898 break; 899 case Opt_commit: 900 if (match_int(&args[0], &option)) 901 return 0; 902 if (option < 0) 903 return 0; 904 if (option == 0) 905 option = JBD_DEFAULT_MAX_COMMIT_AGE; 906 sbi->s_commit_interval = HZ * option; 907 break; 908 case Opt_data_journal: 909 data_opt = EXT3_MOUNT_JOURNAL_DATA; 910 goto datacheck; 911 case Opt_data_ordered: 912 data_opt = EXT3_MOUNT_ORDERED_DATA; 913 goto datacheck; 914 case Opt_data_writeback: 915 data_opt = EXT3_MOUNT_WRITEBACK_DATA; 916 datacheck: 917 if (is_remount) { 918 if ((sbi->s_mount_opt & EXT3_MOUNT_DATA_FLAGS) 919 != data_opt) { 920 printk(KERN_ERR 921 "EXT3-fs: cannot change data " 922 "mode on remount\n"); 923 return 0; 924 } 925 } else { 926 sbi->s_mount_opt &= ~EXT3_MOUNT_DATA_FLAGS; 927 sbi->s_mount_opt |= data_opt; 928 } 929 break; 930#ifdef CONFIG_QUOTA 931 case Opt_usrjquota: 932 qtype = USRQUOTA; 933 goto set_qf_name; 934 case Opt_grpjquota: 935 qtype = GRPQUOTA; 936set_qf_name: 937 if (sb_any_quota_enabled(sb)) { 938 printk(KERN_ERR 939 "EXT3-fs: Cannot change journalled " 940 "quota options when quota turned on.\n"); 941 return 0; 942 } 943 qname = match_strdup(&args[0]); 944 if (!qname) { 945 printk(KERN_ERR 946 "EXT3-fs: not enough memory for " 947 "storing quotafile name.\n"); 948 return 0; 949 } 950 if (sbi->s_qf_names[qtype] && 951 strcmp(sbi->s_qf_names[qtype], qname)) { 952 printk(KERN_ERR 953 "EXT3-fs: %s quota file already " 954 "specified.\n", QTYPE2NAME(qtype)); 955 kfree(qname); 956 return 0; 957 } 958 sbi->s_qf_names[qtype] = qname; 959 if (strchr(sbi->s_qf_names[qtype], '/')) { 960 printk(KERN_ERR 961 "EXT3-fs: quotafile must be on " 962 "filesystem root.\n"); 963 kfree(sbi->s_qf_names[qtype]); 964 sbi->s_qf_names[qtype] = NULL; 965 return 0; 966 } 967 set_opt(sbi->s_mount_opt, QUOTA); 968 break; 969 case Opt_offusrjquota: 970 qtype = USRQUOTA; 971 goto clear_qf_name; 972 case Opt_offgrpjquota: 973 qtype = GRPQUOTA; 974clear_qf_name: 975 if (sb_any_quota_enabled(sb)) { 976 printk(KERN_ERR "EXT3-fs: Cannot change " 977 "journalled quota options when " 978 "quota turned on.\n"); 979 return 0; 980 } 981 /* 982 * The space will be released later when all options 983 * are confirmed to be correct 984 */ 985 sbi->s_qf_names[qtype] = NULL; 986 break; 987 case Opt_jqfmt_vfsold: 988 sbi->s_jquota_fmt = QFMT_VFS_OLD; 989 break; 990 case Opt_jqfmt_vfsv0: 991 sbi->s_jquota_fmt = QFMT_VFS_V0; 992 break; 993 case Opt_quota: 994 case Opt_usrquota: 995 set_opt(sbi->s_mount_opt, QUOTA); 996 set_opt(sbi->s_mount_opt, USRQUOTA); 997 break; 998 case Opt_grpquota: 999 set_opt(sbi->s_mount_opt, QUOTA); 1000 set_opt(sbi->s_mount_opt, GRPQUOTA); 1001 break; 1002 case Opt_noquota: 1003 if (sb_any_quota_enabled(sb)) { 1004 printk(KERN_ERR "EXT3-fs: Cannot change quota " 1005 "options when quota turned on.\n"); 1006 return 0; 1007 } 1008 clear_opt(sbi->s_mount_opt, QUOTA); 1009 clear_opt(sbi->s_mount_opt, USRQUOTA); 1010 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1011 break; 1012#else 1013 case Opt_quota: 1014 case Opt_usrquota: 1015 case Opt_grpquota: 1016 case Opt_usrjquota: 1017 case Opt_grpjquota: 1018 case Opt_offusrjquota: 1019 case Opt_offgrpjquota: 1020 case Opt_jqfmt_vfsold: 1021 case Opt_jqfmt_vfsv0: 1022 printk(KERN_ERR 1023 "EXT3-fs: journalled quota options not " 1024 "supported.\n"); 1025 break; 1026 case Opt_noquota: 1027 break; 1028#endif 1029 case Opt_abort: 1030 set_opt(sbi->s_mount_opt, ABORT); 1031 break; 1032 case Opt_barrier: 1033 if (match_int(&args[0], &option)) 1034 return 0; 1035 if (option) 1036 set_opt(sbi->s_mount_opt, BARRIER); 1037 else 1038 clear_opt(sbi->s_mount_opt, BARRIER); 1039 break; 1040 case Opt_ignore: 1041 break; 1042 case Opt_resize: 1043 if (!is_remount) { 1044 printk("EXT3-fs: resize option only available " 1045 "for remount\n"); 1046 return 0; 1047 } 1048 if (match_int(&args[0], &option) != 0) 1049 return 0; 1050 *n_blocks_count = option; 1051 break; 1052 case Opt_nobh: 1053 set_opt(sbi->s_mount_opt, NOBH); 1054 break; 1055 case Opt_bh: 1056 clear_opt(sbi->s_mount_opt, NOBH); 1057 break; 1058 default: 1059 printk (KERN_ERR 1060 "EXT3-fs: Unrecognized mount option \"%s\" " 1061 "or missing value\n", p); 1062 return 0; 1063 } 1064 } 1065#ifdef CONFIG_QUOTA 1066 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { 1067 if ((sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA) && 1068 sbi->s_qf_names[USRQUOTA]) 1069 clear_opt(sbi->s_mount_opt, USRQUOTA); 1070 1071 if ((sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA) && 1072 sbi->s_qf_names[GRPQUOTA]) 1073 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1074 1075 if ((sbi->s_qf_names[USRQUOTA] && 1076 (sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA)) || 1077 (sbi->s_qf_names[GRPQUOTA] && 1078 (sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA))) { 1079 printk(KERN_ERR "EXT3-fs: old and new quota " 1080 "format mixing.\n"); 1081 return 0; 1082 } 1083 1084 if (!sbi->s_jquota_fmt) { 1085 printk(KERN_ERR "EXT3-fs: journalled quota format " 1086 "not specified.\n"); 1087 return 0; 1088 } 1089 } else { 1090 if (sbi->s_jquota_fmt) { 1091 printk(KERN_ERR "EXT3-fs: journalled quota format " 1092 "specified with no journalling " 1093 "enabled.\n"); 1094 return 0; 1095 } 1096 } 1097#endif 1098 return 1; 1099} 1100 1101static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es, 1102 int read_only) 1103{ 1104 struct ext3_sb_info *sbi = EXT3_SB(sb); 1105 int res = 0; 1106 1107 if (le32_to_cpu(es->s_rev_level) > EXT3_MAX_SUPP_REV) { 1108 printk (KERN_ERR "EXT3-fs warning: revision level too high, " 1109 "forcing read-only mode\n"); 1110 res = MS_RDONLY; 1111 } 1112 if (read_only) 1113 return res; 1114 if (!(sbi->s_mount_state & EXT3_VALID_FS)) 1115 printk (KERN_WARNING "EXT3-fs warning: mounting unchecked fs, " 1116 "running e2fsck is recommended\n"); 1117 else if ((sbi->s_mount_state & EXT3_ERROR_FS)) 1118 printk (KERN_WARNING 1119 "EXT3-fs warning: mounting fs with errors, " 1120 "running e2fsck is recommended\n"); 1121 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && 1122 le16_to_cpu(es->s_mnt_count) >= 1123 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) 1124 printk (KERN_WARNING 1125 "EXT3-fs warning: maximal mount count reached, " 1126 "running e2fsck is recommended\n"); 1127 else if (le32_to_cpu(es->s_checkinterval) && 1128 (le32_to_cpu(es->s_lastcheck) + 1129 le32_to_cpu(es->s_checkinterval) <= get_seconds())) 1130 printk (KERN_WARNING 1131 "EXT3-fs warning: checktime reached, " 1132 "running e2fsck is recommended\n"); 1133 if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) 1134 es->s_max_mnt_count = cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT); 1135 es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1); 1136 es->s_mtime = cpu_to_le32(get_seconds()); 1137 ext3_update_dynamic_rev(sb); 1138 EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 1139 1140 ext3_commit_super(sb, es, 1); 1141 if (test_opt(sb, DEBUG)) 1142 printk(KERN_INFO "[EXT3 FS bs=%lu, gc=%lu, " 1143 "bpg=%lu, ipg=%lu, mo=%04lx]\n", 1144 sb->s_blocksize, 1145 sbi->s_groups_count, 1146 EXT3_BLOCKS_PER_GROUP(sb), 1147 EXT3_INODES_PER_GROUP(sb), 1148 sbi->s_mount_opt); 1149 1150 printk(KERN_INFO "EXT3 FS on %s, ", sb->s_id); 1151 if (EXT3_SB(sb)->s_journal->j_inode == NULL) { 1152 char b[BDEVNAME_SIZE]; 1153 1154 printk("external journal on %s\n", 1155 bdevname(EXT3_SB(sb)->s_journal->j_dev, b)); 1156 } else { 1157 printk("internal journal\n"); 1158 } 1159 return res; 1160} 1161 1162/* Called at mount-time, super-block is locked */ 1163static int ext3_check_descriptors (struct super_block * sb) 1164{ 1165 struct ext3_sb_info *sbi = EXT3_SB(sb); 1166 ext3_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); 1167 ext3_fsblk_t last_block; 1168 struct ext3_group_desc * gdp = NULL; 1169 int desc_block = 0; 1170 int i; 1171 1172 ext3_debug ("Checking group descriptors"); 1173 1174 for (i = 0; i < sbi->s_groups_count; i++) 1175 { 1176 if (i == sbi->s_groups_count - 1) 1177 last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1; 1178 else 1179 last_block = first_block + 1180 (EXT3_BLOCKS_PER_GROUP(sb) - 1); 1181 1182 if ((i % EXT3_DESC_PER_BLOCK(sb)) == 0) 1183 gdp = (struct ext3_group_desc *) 1184 sbi->s_group_desc[desc_block++]->b_data; 1185 if (le32_to_cpu(gdp->bg_block_bitmap) < first_block || 1186 le32_to_cpu(gdp->bg_block_bitmap) > last_block) 1187 { 1188 ext3_error (sb, "ext3_check_descriptors", 1189 "Block bitmap for group %d" 1190 " not in group (block %lu)!", 1191 i, (unsigned long) 1192 le32_to_cpu(gdp->bg_block_bitmap)); 1193 return 0; 1194 } 1195 if (le32_to_cpu(gdp->bg_inode_bitmap) < first_block || 1196 le32_to_cpu(gdp->bg_inode_bitmap) > last_block) 1197 { 1198 ext3_error (sb, "ext3_check_descriptors", 1199 "Inode bitmap for group %d" 1200 " not in group (block %lu)!", 1201 i, (unsigned long) 1202 le32_to_cpu(gdp->bg_inode_bitmap)); 1203 return 0; 1204 } 1205 if (le32_to_cpu(gdp->bg_inode_table) < first_block || 1206 le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group > 1207 last_block) 1208 { 1209 ext3_error (sb, "ext3_check_descriptors", 1210 "Inode table for group %d" 1211 " not in group (block %lu)!", 1212 i, (unsigned long) 1213 le32_to_cpu(gdp->bg_inode_table)); 1214 return 0; 1215 } 1216 first_block += EXT3_BLOCKS_PER_GROUP(sb); 1217 gdp++; 1218 } 1219 1220 sbi->s_es->s_free_blocks_count=cpu_to_le32(ext3_count_free_blocks(sb)); 1221 sbi->s_es->s_free_inodes_count=cpu_to_le32(ext3_count_free_inodes(sb)); 1222 return 1; 1223} 1224 1225 1226/* ext3_orphan_cleanup() walks a singly-linked list of inodes (starting at 1227 * the superblock) which were deleted from all directories, but held open by 1228 * a process at the time of a crash. We walk the list and try to delete these 1229 * inodes at recovery time (only with a read-write filesystem). 1230 * 1231 * In order to keep the orphan inode chain consistent during traversal (in 1232 * case of crash during recovery), we link each inode into the superblock 1233 * orphan list_head and handle it the same way as an inode deletion during 1234 * normal operation (which journals the operations for us). 1235 * 1236 * We only do an iget() and an iput() on each inode, which is very safe if we 1237 * accidentally point at an in-use or already deleted inode. The worst that 1238 * can happen in this case is that we get a "bit already cleared" message from 1239 * ext3_free_inode(). The only reason we would point at a wrong inode is if 1240 * e2fsck was run on this filesystem, and it must have already done the orphan 1241 * inode cleanup for us, so we can safely abort without any further action. 1242 */ 1243static void ext3_orphan_cleanup (struct super_block * sb, 1244 struct ext3_super_block * es) 1245{ 1246 unsigned int s_flags = sb->s_flags; 1247 int nr_orphans = 0, nr_truncates = 0; 1248#ifdef CONFIG_QUOTA 1249 int i; 1250#endif 1251 if (!es->s_last_orphan) { 1252 jbd_debug(4, "no orphan inodes to clean up\n"); 1253 return; 1254 } 1255 1256 if (bdev_read_only(sb->s_bdev)) { 1257 printk(KERN_ERR "EXT3-fs: write access " 1258 "unavailable, skipping orphan cleanup.\n"); 1259 return; 1260 } 1261 1262 if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) { 1263 if (es->s_last_orphan) 1264 jbd_debug(1, "Errors on filesystem, " 1265 "clearing orphan list.\n"); 1266 es->s_last_orphan = 0; 1267 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); 1268 return; 1269 } 1270 1271 if (s_flags & MS_RDONLY) { 1272 printk(KERN_INFO "EXT3-fs: %s: orphan cleanup on readonly fs\n", 1273 sb->s_id); 1274 sb->s_flags &= ~MS_RDONLY; 1275 } 1276#ifdef CONFIG_QUOTA 1277 /* Needed for iput() to work correctly and not trash data */ 1278 sb->s_flags |= MS_ACTIVE; 1279 /* Turn on quotas so that they are updated correctly */ 1280 for (i = 0; i < MAXQUOTAS; i++) { 1281 if (EXT3_SB(sb)->s_qf_names[i]) { 1282 int ret = ext3_quota_on_mount(sb, i); 1283 if (ret < 0) 1284 printk(KERN_ERR 1285 "EXT3-fs: Cannot turn on journalled " 1286 "quota: error %d\n", ret); 1287 } 1288 } 1289#endif 1290 1291 while (es->s_last_orphan) { 1292 struct inode *inode; 1293 1294 if (!(inode = 1295 ext3_orphan_get(sb, le32_to_cpu(es->s_last_orphan)))) { 1296 es->s_last_orphan = 0; 1297 break; 1298 } 1299 1300 list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan); 1301 DQUOT_INIT(inode); 1302 if (inode->i_nlink) { 1303 printk(KERN_DEBUG 1304 "%s: truncating inode %lu to %Ld bytes\n", 1305 __FUNCTION__, inode->i_ino, inode->i_size); 1306 jbd_debug(2, "truncating inode %lu to %Ld bytes\n", 1307 inode->i_ino, inode->i_size); 1308 ext3_truncate(inode); 1309 nr_truncates++; 1310 } else { 1311 printk(KERN_DEBUG 1312 "%s: deleting unreferenced inode %lu\n", 1313 __FUNCTION__, inode->i_ino); 1314 jbd_debug(2, "deleting unreferenced inode %lu\n", 1315 inode->i_ino); 1316 nr_orphans++; 1317 } 1318 iput(inode); /* The delete magic happens here! */ 1319 } 1320 1321#define PLURAL(x) (x), ((x)==1) ? "" : "s" 1322 1323 if (nr_orphans) 1324 printk(KERN_INFO "EXT3-fs: %s: %d orphan inode%s deleted\n", 1325 sb->s_id, PLURAL(nr_orphans)); 1326 if (nr_truncates) 1327 printk(KERN_INFO "EXT3-fs: %s: %d truncate%s cleaned up\n", 1328 sb->s_id, PLURAL(nr_truncates)); 1329#ifdef CONFIG_QUOTA 1330 /* Turn quotas off */ 1331 for (i = 0; i < MAXQUOTAS; i++) { 1332 if (sb_dqopt(sb)->files[i]) 1333 vfs_quota_off(sb, i); 1334 } 1335#endif 1336 sb->s_flags = s_flags; /* Restore MS_RDONLY status */ 1337} 1338 1339/* 1340 * Maximal file size. There is a direct, and {,double-,triple-}indirect 1341 * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks. 1342 * We need to be 1 filesystem block less than the 2^32 sector limit. 1343 */ 1344static loff_t ext3_max_size(int bits) 1345{ 1346 loff_t res = EXT3_NDIR_BLOCKS; 1347 /* This constant is calculated to be the largest file size for a 1348 * dense, 4k-blocksize file such that the total number of 1349 * sectors in the file, including data and all indirect blocks, 1350 * does not exceed 2^32. */ 1351 const loff_t upper_limit = 0x1ff7fffd000LL; 1352 1353 res += 1LL << (bits-2); 1354 res += 1LL << (2*(bits-2)); 1355 res += 1LL << (3*(bits-2)); 1356 res <<= bits; 1357 if (res > upper_limit) 1358 res = upper_limit; 1359 return res; 1360} 1361 1362static ext3_fsblk_t descriptor_loc(struct super_block *sb, 1363 ext3_fsblk_t logic_sb_block, 1364 int nr) 1365{ 1366 struct ext3_sb_info *sbi = EXT3_SB(sb); 1367 unsigned long bg, first_meta_bg; 1368 int has_super = 0; 1369 1370 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); 1371 1372 if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) || 1373 nr < first_meta_bg) 1374 return (logic_sb_block + nr + 1); 1375 bg = sbi->s_desc_per_block * nr; 1376 if (ext3_bg_has_super(sb, bg)) 1377 has_super = 1; 1378 return (has_super + ext3_group_first_block_no(sb, bg)); 1379} 1380 1381 1382static int ext3_fill_super (struct super_block *sb, void *data, int silent) 1383{ 1384 struct buffer_head * bh; 1385 struct ext3_super_block *es = NULL; 1386 struct ext3_sb_info *sbi; 1387 ext3_fsblk_t block; 1388 ext3_fsblk_t sb_block = get_sb_block(&data); 1389 ext3_fsblk_t logic_sb_block; 1390 unsigned long offset = 0; 1391 unsigned int journal_inum = 0; 1392 unsigned long journal_devnum = 0; 1393 unsigned long def_mount_opts; 1394 struct inode *root; 1395 int blocksize; 1396 int hblock; 1397 int db_count; 1398 int i; 1399 int needs_recovery; 1400 __le32 features; 1401 1402 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 1403 if (!sbi) 1404 return -ENOMEM; 1405 sb->s_fs_info = sbi; 1406 sbi->s_mount_opt = 0; 1407 sbi->s_resuid = EXT3_DEF_RESUID; 1408 sbi->s_resgid = EXT3_DEF_RESGID; 1409 1410 unlock_kernel(); 1411 1412 blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE); 1413 if (!blocksize) { 1414 printk(KERN_ERR "EXT3-fs: unable to set blocksize\n"); 1415 goto out_fail; 1416 } 1417 1418 /* 1419 * The ext3 superblock will not be buffer aligned for other than 1kB 1420 * block sizes. We need to calculate the offset from buffer start. 1421 */ 1422 if (blocksize != EXT3_MIN_BLOCK_SIZE) { 1423 logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize; 1424 offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize; 1425 } else { 1426 logic_sb_block = sb_block; 1427 } 1428 1429 if (!(bh = sb_bread(sb, logic_sb_block))) { 1430 printk (KERN_ERR "EXT3-fs: unable to read superblock\n"); 1431 goto out_fail; 1432 } 1433 /* 1434 * Note: s_es must be initialized as soon as possible because 1435 * some ext3 macro-instructions depend on its value 1436 */ 1437 es = (struct ext3_super_block *) (((char *)bh->b_data) + offset); 1438 sbi->s_es = es; 1439 sb->s_magic = le16_to_cpu(es->s_magic); 1440 if (sb->s_magic != EXT3_SUPER_MAGIC) 1441 goto cantfind_ext3; 1442 1443 /* Set defaults before we parse the mount options */ 1444 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 1445 if (def_mount_opts & EXT3_DEFM_DEBUG) 1446 set_opt(sbi->s_mount_opt, DEBUG); 1447 if (def_mount_opts & EXT3_DEFM_BSDGROUPS) 1448 set_opt(sbi->s_mount_opt, GRPID); 1449 if (def_mount_opts & EXT3_DEFM_UID16) 1450 set_opt(sbi->s_mount_opt, NO_UID32); 1451#ifdef CONFIG_EXT3_FS_XATTR 1452 if (def_mount_opts & EXT3_DEFM_XATTR_USER) 1453 set_opt(sbi->s_mount_opt, XATTR_USER); 1454#endif 1455#ifdef CONFIG_EXT3_FS_POSIX_ACL 1456 if (def_mount_opts & EXT3_DEFM_ACL) 1457 set_opt(sbi->s_mount_opt, POSIX_ACL); 1458#endif 1459 if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_DATA) 1460 sbi->s_mount_opt |= EXT3_MOUNT_JOURNAL_DATA; 1461 else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_ORDERED) 1462 sbi->s_mount_opt |= EXT3_MOUNT_ORDERED_DATA; 1463 else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_WBACK) 1464 sbi->s_mount_opt |= EXT3_MOUNT_WRITEBACK_DATA; 1465 1466 if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_PANIC) 1467 set_opt(sbi->s_mount_opt, ERRORS_PANIC); 1468 else if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_RO) 1469 set_opt(sbi->s_mount_opt, ERRORS_RO); 1470 else 1471 set_opt(sbi->s_mount_opt, ERRORS_CONT); 1472 1473 sbi->s_resuid = le16_to_cpu(es->s_def_resuid); 1474 sbi->s_resgid = le16_to_cpu(es->s_def_resgid); 1475 1476 set_opt(sbi->s_mount_opt, RESERVATION); 1477 1478 if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, 1479 NULL, 0)) 1480 goto failed_mount; 1481 1482 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 1483 ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 1484 1485 if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV && 1486 (EXT3_HAS_COMPAT_FEATURE(sb, ~0U) || 1487 EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) || 1488 EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U))) 1489 printk(KERN_WARNING 1490 "EXT3-fs warning: feature flags set on rev 0 fs, " 1491 "running e2fsck is recommended\n"); 1492 /* 1493 * Check feature flags regardless of the revision level, since we 1494 * previously didn't change the revision level when setting the flags, 1495 * so there is a chance incompat flags are set on a rev 0 filesystem. 1496 */ 1497 features = EXT3_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP); 1498 if (features) { 1499 printk(KERN_ERR "EXT3-fs: %s: couldn't mount because of " 1500 "unsupported optional features (%x).\n", 1501 sb->s_id, le32_to_cpu(features)); 1502 goto failed_mount; 1503 } 1504 features = EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP); 1505 if (!(sb->s_flags & MS_RDONLY) && features) { 1506 printk(KERN_ERR "EXT3-fs: %s: couldn't mount RDWR because of " 1507 "unsupported optional features (%x).\n", 1508 sb->s_id, le32_to_cpu(features)); 1509 goto failed_mount; 1510 } 1511 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); 1512 1513 if (blocksize < EXT3_MIN_BLOCK_SIZE || 1514 blocksize > EXT3_MAX_BLOCK_SIZE) { 1515 printk(KERN_ERR 1516 "EXT3-fs: Unsupported filesystem blocksize %d on %s.\n", 1517 blocksize, sb->s_id); 1518 goto failed_mount; 1519 } 1520 1521 hblock = bdev_hardsect_size(sb->s_bdev); 1522 if (sb->s_blocksize != blocksize) { 1523 /* 1524 * Make sure the blocksize for the filesystem is larger 1525 * than the hardware sectorsize for the machine. 1526 */ 1527 if (blocksize < hblock) { 1528 printk(KERN_ERR "EXT3-fs: blocksize %d too small for " 1529 "device blocksize %d.\n", blocksize, hblock); 1530 goto failed_mount; 1531 } 1532 1533 brelse (bh); 1534 sb_set_blocksize(sb, blocksize); 1535 logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize; 1536 offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize; 1537 bh = sb_bread(sb, logic_sb_block); 1538 if (!bh) { 1539 printk(KERN_ERR 1540 "EXT3-fs: Can't read superblock on 2nd try.\n"); 1541 goto failed_mount; 1542 } 1543 es = (struct ext3_super_block *)(((char *)bh->b_data) + offset); 1544 sbi->s_es = es; 1545 if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) { 1546 printk (KERN_ERR 1547 "EXT3-fs: Magic mismatch, very weird !\n"); 1548 goto failed_mount; 1549 } 1550 } 1551 1552 sb->s_maxbytes = ext3_max_size(sb->s_blocksize_bits); 1553 1554 if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV) { 1555 sbi->s_inode_size = EXT3_GOOD_OLD_INODE_SIZE; 1556 sbi->s_first_ino = EXT3_GOOD_OLD_FIRST_INO; 1557 } else { 1558 sbi->s_inode_size = le16_to_cpu(es->s_inode_size); 1559 sbi->s_first_ino = le32_to_cpu(es->s_first_ino); 1560 if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) || 1561 (sbi->s_inode_size & (sbi->s_inode_size - 1)) || 1562 (sbi->s_inode_size > blocksize)) { 1563 printk (KERN_ERR 1564 "EXT3-fs: unsupported inode size: %d\n", 1565 sbi->s_inode_size); 1566 goto failed_mount; 1567 } 1568 } 1569 sbi->s_frag_size = EXT3_MIN_FRAG_SIZE << 1570 le32_to_cpu(es->s_log_frag_size); 1571 if (blocksize != sbi->s_frag_size) { 1572 printk(KERN_ERR 1573 "EXT3-fs: fragsize %lu != blocksize %u (unsupported)\n", 1574 sbi->s_frag_size, blocksize); 1575 goto failed_mount; 1576 } 1577 sbi->s_frags_per_block = 1; 1578 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); 1579 sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group); 1580 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); 1581 if (EXT3_INODE_SIZE(sb) == 0) 1582 goto cantfind_ext3; 1583 sbi->s_inodes_per_block = blocksize / EXT3_INODE_SIZE(sb); 1584 if (sbi->s_inodes_per_block == 0) 1585 goto cantfind_ext3; 1586 sbi->s_itb_per_group = sbi->s_inodes_per_group / 1587 sbi->s_inodes_per_block; 1588 sbi->s_desc_per_block = blocksize / sizeof(struct ext3_group_desc); 1589 sbi->s_sbh = bh; 1590 sbi->s_mount_state = le16_to_cpu(es->s_state); 1591 sbi->s_addr_per_block_bits = ilog2(EXT3_ADDR_PER_BLOCK(sb)); 1592 sbi->s_desc_per_block_bits = ilog2(EXT3_DESC_PER_BLOCK(sb)); 1593 for (i=0; i < 4; i++) 1594 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); 1595 sbi->s_def_hash_version = es->s_def_hash_version; 1596 1597 if (sbi->s_blocks_per_group > blocksize * 8) { 1598 printk (KERN_ERR 1599 "EXT3-fs: #blocks per group too big: %lu\n", 1600 sbi->s_blocks_per_group); 1601 goto failed_mount; 1602 } 1603 if (sbi->s_frags_per_group > blocksize * 8) { 1604 printk (KERN_ERR 1605 "EXT3-fs: #fragments per group too big: %lu\n", 1606 sbi->s_frags_per_group); 1607 goto failed_mount; 1608 } 1609 if (sbi->s_inodes_per_group > blocksize * 8) { 1610 printk (KERN_ERR 1611 "EXT3-fs: #inodes per group too big: %lu\n", 1612 sbi->s_inodes_per_group); 1613 goto failed_mount; 1614 } 1615 1616 if (le32_to_cpu(es->s_blocks_count) > 1617 (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { 1618 printk(KERN_ERR "EXT3-fs: filesystem on %s:" 1619 " too large to mount safely\n", sb->s_id); 1620 if (sizeof(sector_t) < 8) 1621 printk(KERN_WARNING "EXT3-fs: CONFIG_LBD not " 1622 "enabled\n"); 1623 goto failed_mount; 1624 } 1625 1626 if (EXT3_BLOCKS_PER_GROUP(sb) == 0) 1627 goto cantfind_ext3; 1628 sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) - 1629 le32_to_cpu(es->s_first_data_block) - 1) 1630 / EXT3_BLOCKS_PER_GROUP(sb)) + 1; 1631 db_count = (sbi->s_groups_count + EXT3_DESC_PER_BLOCK(sb) - 1) / 1632 EXT3_DESC_PER_BLOCK(sb); 1633 sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *), 1634 GFP_KERNEL); 1635 if (sbi->s_group_desc == NULL) { 1636 printk (KERN_ERR "EXT3-fs: not enough memory\n"); 1637 goto failed_mount; 1638 } 1639 1640 bgl_lock_init(&sbi->s_blockgroup_lock); 1641 1642 for (i = 0; i < db_count; i++) { 1643 block = descriptor_loc(sb, logic_sb_block, i); 1644 sbi->s_group_desc[i] = sb_bread(sb, block); 1645 if (!sbi->s_group_desc[i]) { 1646 printk (KERN_ERR "EXT3-fs: " 1647 "can't read group descriptor %d\n", i); 1648 db_count = i; 1649 goto failed_mount2; 1650 } 1651 } 1652 if (!ext3_check_descriptors (sb)) { 1653 printk(KERN_ERR "EXT3-fs: group descriptors corrupted!\n"); 1654 goto failed_mount2; 1655 } 1656 sbi->s_gdb_count = db_count; 1657 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 1658 spin_lock_init(&sbi->s_next_gen_lock); 1659 1660 percpu_counter_init(&sbi->s_freeblocks_counter, 1661 ext3_count_free_blocks(sb)); 1662 percpu_counter_init(&sbi->s_freeinodes_counter, 1663 ext3_count_free_inodes(sb)); 1664 percpu_counter_init(&sbi->s_dirs_counter, 1665 ext3_count_dirs(sb)); 1666 1667 /* per fileystem reservation list head & lock */ 1668 spin_lock_init(&sbi->s_rsv_window_lock); 1669 sbi->s_rsv_window_root = RB_ROOT; 1670 /* Add a single, static dummy reservation to the start of the 1671 * reservation window list --- it gives us a placeholder for 1672 * append-at-start-of-list which makes the allocation logic 1673 * _much_ simpler. */ 1674 sbi->s_rsv_window_head.rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; 1675 sbi->s_rsv_window_head.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; 1676 sbi->s_rsv_window_head.rsv_alloc_hit = 0; 1677 sbi->s_rsv_window_head.rsv_goal_size = 0; 1678 ext3_rsv_window_add(sb, &sbi->s_rsv_window_head); 1679 1680 /* 1681 * set up enough so that it can read an inode 1682 */ 1683 sb->s_op = &ext3_sops; 1684 sb->s_export_op = &ext3_export_ops; 1685 sb->s_xattr = ext3_xattr_handlers; 1686#ifdef CONFIG_QUOTA 1687 sb->s_qcop = &ext3_qctl_operations; 1688 sb->dq_op = &ext3_quota_operations; 1689#endif 1690 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 1691 1692 sb->s_root = NULL; 1693 1694 needs_recovery = (es->s_last_orphan != 0 || 1695 EXT3_HAS_INCOMPAT_FEATURE(sb, 1696 EXT3_FEATURE_INCOMPAT_RECOVER)); 1697 1698 /* 1699 * The first inode we look at is the journal inode. Don't try 1700 * root first: it may be modified in the journal! 1701 */ 1702 if (!test_opt(sb, NOLOAD) && 1703 EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) { 1704 if (ext3_load_journal(sb, es, journal_devnum)) 1705 goto failed_mount3; 1706 } else if (journal_inum) { 1707 if (ext3_create_journal(sb, es, journal_inum)) 1708 goto failed_mount3; 1709 } else { 1710 if (!silent) 1711 printk (KERN_ERR 1712 "ext3: No journal on filesystem on %s\n", 1713 sb->s_id); 1714 goto failed_mount3; 1715 } 1716 1717 /* We have now updated the journal if required, so we can 1718 * validate the data journaling mode. */ 1719 switch (test_opt(sb, DATA_FLAGS)) { 1720 case 0: 1721 /* No mode set, assume a default based on the journal 1722 capabilities: ORDERED_DATA if the journal can 1723 cope, else JOURNAL_DATA */ 1724 if (journal_check_available_features 1725 (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) 1726 set_opt(sbi->s_mount_opt, ORDERED_DATA); 1727 else 1728 set_opt(sbi->s_mount_opt, JOURNAL_DATA); 1729 break; 1730 1731 case EXT3_MOUNT_ORDERED_DATA: 1732 case EXT3_MOUNT_WRITEBACK_DATA: 1733 if (!journal_check_available_features 1734 (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) { 1735 printk(KERN_ERR "EXT3-fs: Journal does not support " 1736 "requested data journaling mode\n"); 1737 goto failed_mount4; 1738 } 1739 default: 1740 break; 1741 } 1742 1743 if (test_opt(sb, NOBH)) { 1744 if (!(test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)) { 1745 printk(KERN_WARNING "EXT3-fs: Ignoring nobh option - " 1746 "its supported only with writeback mode\n"); 1747 clear_opt(sbi->s_mount_opt, NOBH); 1748 } 1749 } 1750 /* 1751 * The journal_load will have done any necessary log recovery, 1752 * so we can safely mount the rest of the filesystem now. 1753 */ 1754 1755 root = iget(sb, EXT3_ROOT_INO); 1756 sb->s_root = d_alloc_root(root); 1757 if (!sb->s_root) { 1758 printk(KERN_ERR "EXT3-fs: get root inode failed\n"); 1759 iput(root); 1760 goto failed_mount4; 1761 } 1762 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 1763 dput(sb->s_root); 1764 sb->s_root = NULL; 1765 printk(KERN_ERR "EXT3-fs: corrupt root inode, run e2fsck\n"); 1766 goto failed_mount4; 1767 } 1768 1769 ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); 1770 /* 1771 * akpm: core read_super() calls in here with the superblock locked. 1772 * That deadlocks, because orphan cleanup needs to lock the superblock 1773 * in numerous places. Here we just pop the lock - it's relatively 1774 * harmless, because we are now ready to accept write_super() requests, 1775 * and aviro says that's the only reason for hanging onto the 1776 * superblock lock. 1777 */ 1778 EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS; 1779 ext3_orphan_cleanup(sb, es); 1780 EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS; 1781 if (needs_recovery) 1782 printk (KERN_INFO "EXT3-fs: recovery complete.\n"); 1783 ext3_mark_recovery_complete(sb, es); 1784 printk (KERN_INFO "EXT3-fs: mounted filesystem with %s data mode.\n", 1785 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal": 1786 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": 1787 "writeback"); 1788 1789 lock_kernel(); 1790 return 0; 1791 1792cantfind_ext3: 1793 if (!silent) 1794 printk(KERN_ERR "VFS: Can't find ext3 filesystem on dev %s.\n", 1795 sb->s_id); 1796 goto failed_mount; 1797 1798failed_mount4: 1799 journal_destroy(sbi->s_journal); 1800failed_mount3: 1801 percpu_counter_destroy(&sbi->s_freeblocks_counter); 1802 percpu_counter_destroy(&sbi->s_freeinodes_counter); 1803 percpu_counter_destroy(&sbi->s_dirs_counter); 1804failed_mount2: 1805 for (i = 0; i < db_count; i++) 1806 brelse(sbi->s_group_desc[i]); 1807 kfree(sbi->s_group_desc); 1808failed_mount: 1809#ifdef CONFIG_QUOTA 1810 for (i = 0; i < MAXQUOTAS; i++) 1811 kfree(sbi->s_qf_names[i]); 1812#endif 1813 ext3_blkdev_remove(sbi); 1814 brelse(bh); 1815out_fail: 1816 sb->s_fs_info = NULL; 1817 kfree(sbi); 1818 lock_kernel(); 1819 return -EINVAL; 1820} 1821 1822/* 1823 * Setup any per-fs journal parameters now. We'll do this both on 1824 * initial mount, once the journal has been initialised but before we've 1825 * done any recovery; and again on any subsequent remount. 1826 */ 1827static void ext3_init_journal_params(struct super_block *sb, journal_t *journal) 1828{ 1829 struct ext3_sb_info *sbi = EXT3_SB(sb); 1830 1831 if (sbi->s_commit_interval) 1832 journal->j_commit_interval = sbi->s_commit_interval; 1833 /* We could also set up an ext3-specific default for the commit 1834 * interval here, but for now we'll just fall back to the jbd 1835 * default. */ 1836 1837 spin_lock(&journal->j_state_lock); 1838 if (test_opt(sb, BARRIER)) 1839 journal->j_flags |= JFS_BARRIER; 1840 else 1841 journal->j_flags &= ~JFS_BARRIER; 1842 spin_unlock(&journal->j_state_lock); 1843} 1844 1845static journal_t *ext3_get_journal(struct super_block *sb, 1846 unsigned int journal_inum) 1847{ 1848 struct inode *journal_inode; 1849 journal_t *journal; 1850 1851 /* First, test for the existence of a valid inode on disk. Bad 1852 * things happen if we iget() an unused inode, as the subsequent 1853 * iput() will try to delete it. */ 1854 1855 journal_inode = iget(sb, journal_inum); 1856 if (!journal_inode) { 1857 printk(KERN_ERR "EXT3-fs: no journal found.\n"); 1858 return NULL; 1859 } 1860 if (!journal_inode->i_nlink) { 1861 make_bad_inode(journal_inode); 1862 iput(journal_inode); 1863 printk(KERN_ERR "EXT3-fs: journal inode is deleted.\n"); 1864 return NULL; 1865 } 1866 1867 jbd_debug(2, "Journal inode found at %p: %Ld bytes\n", 1868 journal_inode, journal_inode->i_size); 1869 if (is_bad_inode(journal_inode) || !S_ISREG(journal_inode->i_mode)) { 1870 printk(KERN_ERR "EXT3-fs: invalid journal inode.\n"); 1871 iput(journal_inode); 1872 return NULL; 1873 } 1874 1875 journal = journal_init_inode(journal_inode); 1876 if (!journal) { 1877 printk(KERN_ERR "EXT3-fs: Could not load journal inode\n"); 1878 iput(journal_inode); 1879 return NULL; 1880 } 1881 journal->j_private = sb; 1882 ext3_init_journal_params(sb, journal); 1883 return journal; 1884} 1885 1886static journal_t *ext3_get_dev_journal(struct super_block *sb, 1887 dev_t j_dev) 1888{ 1889 struct buffer_head * bh; 1890 journal_t *journal; 1891 ext3_fsblk_t start; 1892 ext3_fsblk_t len; 1893 int hblock, blocksize; 1894 ext3_fsblk_t sb_block; 1895 unsigned long offset; 1896 struct ext3_super_block * es; 1897 struct block_device *bdev; 1898 1899 bdev = ext3_blkdev_get(j_dev); 1900 if (bdev == NULL) 1901 return NULL; 1902 1903 if (bd_claim(bdev, sb)) { 1904 printk(KERN_ERR 1905 "EXT3: failed to claim external journal device.\n"); 1906 blkdev_put(bdev); 1907 return NULL; 1908 } 1909 1910 blocksize = sb->s_blocksize; 1911 hblock = bdev_hardsect_size(bdev); 1912 if (blocksize < hblock) { 1913 printk(KERN_ERR 1914 "EXT3-fs: blocksize too small for journal device.\n"); 1915 goto out_bdev; 1916 } 1917 1918 sb_block = EXT3_MIN_BLOCK_SIZE / blocksize; 1919 offset = EXT3_MIN_BLOCK_SIZE % blocksize; 1920 set_blocksize(bdev, blocksize); 1921 if (!(bh = __bread(bdev, sb_block, blocksize))) { 1922 printk(KERN_ERR "EXT3-fs: couldn't read superblock of " 1923 "external journal\n"); 1924 goto out_bdev; 1925 } 1926 1927 es = (struct ext3_super_block *) (((char *)bh->b_data) + offset); 1928 if ((le16_to_cpu(es->s_magic) != EXT3_SUPER_MAGIC) || 1929 !(le32_to_cpu(es->s_feature_incompat) & 1930 EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) { 1931 printk(KERN_ERR "EXT3-fs: external journal has " 1932 "bad superblock\n"); 1933 brelse(bh); 1934 goto out_bdev; 1935 } 1936 1937 if (memcmp(EXT3_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { 1938 printk(KERN_ERR "EXT3-fs: journal UUID does not match\n"); 1939 brelse(bh); 1940 goto out_bdev; 1941 } 1942 1943 len = le32_to_cpu(es->s_blocks_count); 1944 start = sb_block + 1; 1945 brelse(bh); /* we're done with the superblock */ 1946 1947 journal = journal_init_dev(bdev, sb->s_bdev, 1948 start, len, blocksize); 1949 if (!journal) { 1950 printk(KERN_ERR "EXT3-fs: failed to create device journal\n"); 1951 goto out_bdev; 1952 } 1953 journal->j_private = sb; 1954 ll_rw_block(READ, 1, &journal->j_sb_buffer); 1955 wait_on_buffer(journal->j_sb_buffer); 1956 if (!buffer_uptodate(journal->j_sb_buffer)) { 1957 printk(KERN_ERR "EXT3-fs: I/O error on journal device\n"); 1958 goto out_journal; 1959 } 1960 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { 1961 printk(KERN_ERR "EXT3-fs: External journal has more than one " 1962 "user (unsupported) - %d\n", 1963 be32_to_cpu(journal->j_superblock->s_nr_users)); 1964 goto out_journal; 1965 } 1966 EXT3_SB(sb)->journal_bdev = bdev; 1967 ext3_init_journal_params(sb, journal); 1968 return journal; 1969out_journal: 1970 journal_destroy(journal); 1971out_bdev: 1972 ext3_blkdev_put(bdev); 1973 return NULL; 1974} 1975 1976static int ext3_load_journal(struct super_block *sb, 1977 struct ext3_super_block *es, 1978 unsigned long journal_devnum) 1979{ 1980 journal_t *journal; 1981 unsigned int journal_inum = le32_to_cpu(es->s_journal_inum); 1982 dev_t journal_dev; 1983 int err = 0; 1984 int really_read_only; 1985 1986 if (journal_devnum && 1987 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 1988 printk(KERN_INFO "EXT3-fs: external journal device major/minor " 1989 "numbers have changed\n"); 1990 journal_dev = new_decode_dev(journal_devnum); 1991 } else 1992 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); 1993 1994 really_read_only = bdev_read_only(sb->s_bdev); 1995 1996 /* 1997 * Are we loading a blank journal or performing recovery after a 1998 * crash? For recovery, we need to check in advance whether we 1999 * can get read-write access to the device. 2000 */ 2001 2002 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)) { 2003 if (sb->s_flags & MS_RDONLY) { 2004 printk(KERN_INFO "EXT3-fs: INFO: recovery " 2005 "required on readonly filesystem.\n"); 2006 if (really_read_only) { 2007 printk(KERN_ERR "EXT3-fs: write access " 2008 "unavailable, cannot proceed.\n"); 2009 return -EROFS; 2010 } 2011 printk (KERN_INFO "EXT3-fs: write access will " 2012 "be enabled during recovery.\n"); 2013 } 2014 } 2015 2016 if (journal_inum && journal_dev) { 2017 printk(KERN_ERR "EXT3-fs: filesystem has both journal " 2018 "and inode journals!\n"); 2019 return -EINVAL; 2020 } 2021 2022 if (journal_inum) { 2023 if (!(journal = ext3_get_journal(sb, journal_inum))) 2024 return -EINVAL; 2025 } else { 2026 if (!(journal = ext3_get_dev_journal(sb, journal_dev))) 2027 return -EINVAL; 2028 } 2029 2030 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { 2031 err = journal_update_format(journal); 2032 if (err) { 2033 printk(KERN_ERR "EXT3-fs: error updating journal.\n"); 2034 journal_destroy(journal); 2035 return err; 2036 } 2037 } 2038 2039 if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)) 2040 err = journal_wipe(journal, !really_read_only); 2041 if (!err) 2042 err = journal_load(journal); 2043 2044 if (err) { 2045 printk(KERN_ERR "EXT3-fs: error loading journal.\n"); 2046 journal_destroy(journal); 2047 return err; 2048 } 2049 2050 EXT3_SB(sb)->s_journal = journal; 2051 ext3_clear_journal_err(sb, es); 2052 2053 if (journal_devnum && 2054 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 2055 es->s_journal_dev = cpu_to_le32(journal_devnum); 2056 sb->s_dirt = 1; 2057 2058 /* Make sure we flush the recovery flag to disk. */ 2059 ext3_commit_super(sb, es, 1); 2060 } 2061 2062 return 0; 2063} 2064 2065static int ext3_create_journal(struct super_block * sb, 2066 struct ext3_super_block * es, 2067 unsigned int journal_inum) 2068{ 2069 journal_t *journal; 2070 2071 if (sb->s_flags & MS_RDONLY) { 2072 printk(KERN_ERR "EXT3-fs: readonly filesystem when trying to " 2073 "create journal.\n"); 2074 return -EROFS; 2075 } 2076 2077 if (!(journal = ext3_get_journal(sb, journal_inum))) 2078 return -EINVAL; 2079 2080 printk(KERN_INFO "EXT3-fs: creating new journal on inode %u\n", 2081 journal_inum); 2082 2083 if (journal_create(journal)) { 2084 printk(KERN_ERR "EXT3-fs: error creating journal.\n"); 2085 journal_destroy(journal); 2086 return -EIO; 2087 } 2088 2089 EXT3_SB(sb)->s_journal = journal; 2090 2091 ext3_update_dynamic_rev(sb); 2092 EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 2093 EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL); 2094 2095 es->s_journal_inum = cpu_to_le32(journal_inum); 2096 sb->s_dirt = 1; 2097 2098 /* Make sure we flush the recovery flag to disk. */ 2099 ext3_commit_super(sb, es, 1); 2100 2101 return 0; 2102} 2103 2104static void ext3_commit_super (struct super_block * sb, 2105 struct ext3_super_block * es, 2106 int sync) 2107{ 2108 struct buffer_head *sbh = EXT3_SB(sb)->s_sbh; 2109 2110 if (!sbh) 2111 return; 2112 es->s_wtime = cpu_to_le32(get_seconds()); 2113 es->s_free_blocks_count = cpu_to_le32(ext3_count_free_blocks(sb)); 2114 es->s_free_inodes_count = cpu_to_le32(ext3_count_free_inodes(sb)); 2115 BUFFER_TRACE(sbh, "marking dirty"); 2116 mark_buffer_dirty(sbh); 2117 if (sync) 2118 sync_dirty_buffer(sbh); 2119} 2120 2121 2122/* 2123 * Have we just finished recovery? If so, and if we are mounting (or 2124 * remounting) the filesystem readonly, then we will end up with a 2125 * consistent fs on disk. Record that fact. 2126 */ 2127static void ext3_mark_recovery_complete(struct super_block * sb, 2128 struct ext3_super_block * es) 2129{ 2130 journal_t *journal = EXT3_SB(sb)->s_journal; 2131 2132 journal_lock_updates(journal); 2133 journal_flush(journal); 2134 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) && 2135 sb->s_flags & MS_RDONLY) { 2136 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 2137 sb->s_dirt = 0; 2138 ext3_commit_super(sb, es, 1); 2139 } 2140 journal_unlock_updates(journal); 2141} 2142 2143/* 2144 * If we are mounting (or read-write remounting) a filesystem whose journal 2145 * has recorded an error from a previous lifetime, move that error to the 2146 * main filesystem now. 2147 */ 2148static void ext3_clear_journal_err(struct super_block * sb, 2149 struct ext3_super_block * es) 2150{ 2151 journal_t *journal; 2152 int j_errno; 2153 const char *errstr; 2154 2155 journal = EXT3_SB(sb)->s_journal; 2156 2157 /* 2158 * Now check for any error status which may have been recorded in the 2159 * journal by a prior ext3_error() or ext3_abort() 2160 */ 2161 2162 j_errno = journal_errno(journal); 2163 if (j_errno) { 2164 char nbuf[16]; 2165 2166 errstr = ext3_decode_error(sb, j_errno, nbuf); 2167 ext3_warning(sb, __FUNCTION__, "Filesystem error recorded " 2168 "from previous mount: %s", errstr); 2169 ext3_warning(sb, __FUNCTION__, "Marking fs in need of " 2170 "filesystem check."); 2171 2172 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; 2173 es->s_state |= cpu_to_le16(EXT3_ERROR_FS); 2174 ext3_commit_super (sb, es, 1); 2175 2176 journal_clear_err(journal); 2177 } 2178} 2179 2180/* 2181 * Force the running and committing transactions to commit, 2182 * and wait on the commit. 2183 */ 2184int ext3_force_commit(struct super_block *sb) 2185{ 2186 journal_t *journal; 2187 int ret; 2188 2189 if (sb->s_flags & MS_RDONLY) 2190 return 0; 2191 2192 journal = EXT3_SB(sb)->s_journal; 2193 sb->s_dirt = 0; 2194 ret = ext3_journal_force_commit(journal); 2195 return ret; 2196} 2197 2198/* 2199 * Ext3 always journals updates to the superblock itself, so we don't 2200 * have to propagate any other updates to the superblock on disk at this 2201 * point. Just start an async writeback to get the buffers on their way 2202 * to the disk. 2203 * 2204 * This implicitly triggers the writebehind on sync(). 2205 */ 2206 2207static void ext3_write_super (struct super_block * sb) 2208{ 2209 if (mutex_trylock(&sb->s_lock) != 0) 2210 BUG(); 2211 sb->s_dirt = 0; 2212} 2213 2214static int ext3_sync_fs(struct super_block *sb, int wait) 2215{ 2216 tid_t target; 2217 2218 sb->s_dirt = 0; 2219 if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) { 2220 if (wait) 2221 log_wait_commit(EXT3_SB(sb)->s_journal, target); 2222 } 2223 return 0; 2224} 2225 2226/* 2227 * LVM calls this function before a (read-only) snapshot is created. This 2228 * gives us a chance to flush the journal completely and mark the fs clean. 2229 */ 2230static void ext3_write_super_lockfs(struct super_block *sb) 2231{ 2232 sb->s_dirt = 0; 2233 2234 if (!(sb->s_flags & MS_RDONLY)) { 2235 journal_t *journal = EXT3_SB(sb)->s_journal; 2236 2237 /* Now we set up the journal barrier. */ 2238 journal_lock_updates(journal); 2239 journal_flush(journal); 2240 2241 /* Journal blocked and flushed, clear needs_recovery flag. */ 2242 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 2243 ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1); 2244 } 2245} 2246 2247/* 2248 * Called by LVM after the snapshot is done. We need to reset the RECOVER 2249 * flag here, even though the filesystem is not technically dirty yet. 2250 */ 2251static void ext3_unlockfs(struct super_block *sb) 2252{ 2253 if (!(sb->s_flags & MS_RDONLY)) { 2254 lock_super(sb); 2255 /* Reser the needs_recovery flag before the fs is unlocked. */ 2256 EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 2257 ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1); 2258 unlock_super(sb); 2259 journal_unlock_updates(EXT3_SB(sb)->s_journal); 2260 } 2261} 2262 2263static int ext3_remount (struct super_block * sb, int * flags, char * data) 2264{ 2265 struct ext3_super_block * es; 2266 struct ext3_sb_info *sbi = EXT3_SB(sb); 2267 ext3_fsblk_t n_blocks_count = 0; 2268 unsigned long old_sb_flags; 2269 struct ext3_mount_options old_opts; 2270 int err; 2271#ifdef CONFIG_QUOTA 2272 int i; 2273#endif 2274 2275 /* Store the original options */ 2276 old_sb_flags = sb->s_flags; 2277 old_opts.s_mount_opt = sbi->s_mount_opt; 2278 old_opts.s_resuid = sbi->s_resuid; 2279 old_opts.s_resgid = sbi->s_resgid; 2280 old_opts.s_commit_interval = sbi->s_commit_interval; 2281#ifdef CONFIG_QUOTA 2282 old_opts.s_jquota_fmt = sbi->s_jquota_fmt; 2283 for (i = 0; i < MAXQUOTAS; i++) 2284 old_opts.s_qf_names[i] = sbi->s_qf_names[i]; 2285#endif 2286 2287 /* 2288 * Allow the "check" option to be passed as a remount option. 2289 */ 2290 if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) { 2291 err = -EINVAL; 2292 goto restore_opts; 2293 } 2294 2295 if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) 2296 ext3_abort(sb, __FUNCTION__, "Abort forced by user"); 2297 2298 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 2299 ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 2300 2301 es = sbi->s_es; 2302 2303 ext3_init_journal_params(sb, sbi->s_journal); 2304 2305 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || 2306 n_blocks_count > le32_to_cpu(es->s_blocks_count)) { 2307 if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) { 2308 err = -EROFS; 2309 goto restore_opts; 2310 } 2311 2312 if (*flags & MS_RDONLY) { 2313 /* 2314 * First of all, the unconditional stuff we have to do 2315 * to disable replay of the journal when we next remount 2316 */ 2317 sb->s_flags |= MS_RDONLY; 2318 2319 /* 2320 * OK, test if we are remounting a valid rw partition 2321 * readonly, and if so set the rdonly flag and then 2322 * mark the partition as valid again. 2323 */ 2324 if (!(es->s_state & cpu_to_le16(EXT3_VALID_FS)) && 2325 (sbi->s_mount_state & EXT3_VALID_FS)) 2326 es->s_state = cpu_to_le16(sbi->s_mount_state); 2327 2328 ext3_mark_recovery_complete(sb, es); 2329 } else { 2330 __le32 ret; 2331 if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb, 2332 ~EXT3_FEATURE_RO_COMPAT_SUPP))) { 2333 printk(KERN_WARNING "EXT3-fs: %s: couldn't " 2334 "remount RDWR because of unsupported " 2335 "optional features (%x).\n", 2336 sb->s_id, le32_to_cpu(ret)); 2337 err = -EROFS; 2338 goto restore_opts; 2339 } 2340 2341 /* 2342 * If we have an unprocessed orphan list hanging 2343 * around from a previously readonly bdev mount, 2344 * require a full umount/remount for now. 2345 */ 2346 if (es->s_last_orphan) { 2347 printk(KERN_WARNING "EXT3-fs: %s: couldn't " 2348 "remount RDWR because of unprocessed " 2349 "orphan inode list. Please " 2350 "umount/remount instead.\n", 2351 sb->s_id); 2352 err = -EINVAL; 2353 goto restore_opts; 2354 } 2355 2356 /* 2357 * Mounting a RDONLY partition read-write, so reread 2358 * and store the current valid flag. (It may have 2359 * been changed by e2fsck since we originally mounted 2360 * the partition.) 2361 */ 2362 ext3_clear_journal_err(sb, es); 2363 sbi->s_mount_state = le16_to_cpu(es->s_state); 2364 if ((err = ext3_group_extend(sb, es, n_blocks_count))) 2365 goto restore_opts; 2366 if (!ext3_setup_super (sb, es, 0)) 2367 sb->s_flags &= ~MS_RDONLY; 2368 } 2369 } 2370#ifdef CONFIG_QUOTA 2371 /* Release old quota file names */ 2372 for (i = 0; i < MAXQUOTAS; i++) 2373 if (old_opts.s_qf_names[i] && 2374 old_opts.s_qf_names[i] != sbi->s_qf_names[i]) 2375 kfree(old_opts.s_qf_names[i]); 2376#endif 2377 return 0; 2378restore_opts: 2379 sb->s_flags = old_sb_flags; 2380 sbi->s_mount_opt = old_opts.s_mount_opt; 2381 sbi->s_resuid = old_opts.s_resuid; 2382 sbi->s_resgid = old_opts.s_resgid; 2383 sbi->s_commit_interval = old_opts.s_commit_interval; 2384#ifdef CONFIG_QUOTA 2385 sbi->s_jquota_fmt = old_opts.s_jquota_fmt; 2386 for (i = 0; i < MAXQUOTAS; i++) { 2387 if (sbi->s_qf_names[i] && 2388 old_opts.s_qf_names[i] != sbi->s_qf_names[i]) 2389 kfree(sbi->s_qf_names[i]); 2390 sbi->s_qf_names[i] = old_opts.s_qf_names[i]; 2391 } 2392#endif 2393 return err; 2394} 2395 2396static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf) 2397{ 2398 struct super_block *sb = dentry->d_sb; 2399 struct ext3_sb_info *sbi = EXT3_SB(sb); 2400 struct ext3_super_block *es = sbi->s_es; 2401 ext3_fsblk_t overhead; 2402 int i; 2403 u64 fsid; 2404 2405 if (test_opt (sb, MINIX_DF)) 2406 overhead = 0; 2407 else { 2408 unsigned long ngroups; 2409 ngroups = EXT3_SB(sb)->s_groups_count; 2410 smp_rmb(); 2411 2412 /* 2413 * Compute the overhead (FS structures) 2414 */ 2415 2416 /* 2417 * All of the blocks before first_data_block are 2418 * overhead 2419 */ 2420 overhead = le32_to_cpu(es->s_first_data_block); 2421 2422 /* 2423 * Add the overhead attributed to the superblock and 2424 * block group descriptors. If the sparse superblocks 2425 * feature is turned on, then not all groups have this. 2426 */ 2427 for (i = 0; i < ngroups; i++) { 2428 overhead += ext3_bg_has_super(sb, i) + 2429 ext3_bg_num_gdb(sb, i); 2430 cond_resched(); 2431 } 2432 2433 /* 2434 * Every block group has an inode bitmap, a block 2435 * bitmap, and an inode table. 2436 */ 2437 overhead += (ngroups * (2 + EXT3_SB(sb)->s_itb_per_group)); 2438 } 2439 2440 buf->f_type = EXT3_SUPER_MAGIC; 2441 buf->f_bsize = sb->s_blocksize; 2442 buf->f_blocks = le32_to_cpu(es->s_blocks_count) - overhead; 2443 buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter); 2444 buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count); 2445 if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count)) 2446 buf->f_bavail = 0; 2447 buf->f_files = le32_to_cpu(es->s_inodes_count); 2448 buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter); 2449 buf->f_namelen = EXT3_NAME_LEN; 2450 fsid = le64_to_cpup((void *)es->s_uuid) ^ 2451 le64_to_cpup((void *)es->s_uuid + sizeof(u64)); 2452 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; 2453 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; 2454 return 0; 2455} 2456 2457/* Helper function for writing quotas on sync - we need to start transaction before quota file 2458 * is locked for write. Otherwise the are possible deadlocks: 2459 * Process 1 Process 2 2460 * ext3_create() quota_sync() 2461 * journal_start() write_dquot() 2462 * DQUOT_INIT() down(dqio_mutex) 2463 * down(dqio_mutex) journal_start() 2464 * 2465 */ 2466 2467#ifdef CONFIG_QUOTA 2468 2469static inline struct inode *dquot_to_inode(struct dquot *dquot) 2470{ 2471 return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; 2472} 2473 2474static int ext3_dquot_initialize(struct inode *inode, int type) 2475{ 2476 handle_t *handle; 2477 int ret, err; 2478 2479 /* We may create quota structure so we need to reserve enough blocks */ 2480 handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS(inode->i_sb)); 2481 if (IS_ERR(handle)) 2482 return PTR_ERR(handle); 2483 ret = dquot_initialize(inode, type); 2484 err = ext3_journal_stop(handle); 2485 if (!ret) 2486 ret = err; 2487 return ret; 2488} 2489 2490static int ext3_dquot_drop(struct inode *inode) 2491{ 2492 handle_t *handle; 2493 int ret, err; 2494 2495 /* We may delete quota structure so we need to reserve enough blocks */ 2496 handle = ext3_journal_start(inode, 2*EXT3_QUOTA_DEL_BLOCKS(inode->i_sb)); 2497 if (IS_ERR(handle)) 2498 return PTR_ERR(handle); 2499 ret = dquot_drop(inode); 2500 err = ext3_journal_stop(handle); 2501 if (!ret) 2502 ret = err; 2503 return ret; 2504} 2505 2506static int ext3_write_dquot(struct dquot *dquot) 2507{ 2508 int ret, err; 2509 handle_t *handle; 2510 struct inode *inode; 2511 2512 inode = dquot_to_inode(dquot); 2513 handle = ext3_journal_start(inode, 2514 EXT3_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); 2515 if (IS_ERR(handle)) 2516 return PTR_ERR(handle); 2517 ret = dquot_commit(dquot); 2518 err = ext3_journal_stop(handle); 2519 if (!ret) 2520 ret = err; 2521 return ret; 2522} 2523 2524static int ext3_acquire_dquot(struct dquot *dquot) 2525{ 2526 int ret, err; 2527 handle_t *handle; 2528 2529 handle = ext3_journal_start(dquot_to_inode(dquot), 2530 EXT3_QUOTA_INIT_BLOCKS(dquot->dq_sb)); 2531 if (IS_ERR(handle)) 2532 return PTR_ERR(handle); 2533 ret = dquot_acquire(dquot); 2534 err = ext3_journal_stop(handle); 2535 if (!ret) 2536 ret = err; 2537 return ret; 2538} 2539 2540static int ext3_release_dquot(struct dquot *dquot) 2541{ 2542 int ret, err; 2543 handle_t *handle; 2544 2545 handle = ext3_journal_start(dquot_to_inode(dquot), 2546 EXT3_QUOTA_DEL_BLOCKS(dquot->dq_sb)); 2547 if (IS_ERR(handle)) 2548 return PTR_ERR(handle); 2549 ret = dquot_release(dquot); 2550 err = ext3_journal_stop(handle); 2551 if (!ret) 2552 ret = err; 2553 return ret; 2554} 2555 2556static int ext3_mark_dquot_dirty(struct dquot *dquot) 2557{ 2558 /* Are we journalling quotas? */ 2559 if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || 2560 EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { 2561 dquot_mark_dquot_dirty(dquot); 2562 return ext3_write_dquot(dquot); 2563 } else { 2564 return dquot_mark_dquot_dirty(dquot); 2565 } 2566} 2567 2568static int ext3_write_info(struct super_block *sb, int type) 2569{ 2570 int ret, err; 2571 handle_t *handle; 2572 2573 /* Data block + inode block */ 2574 handle = ext3_journal_start(sb->s_root->d_inode, 2); 2575 if (IS_ERR(handle)) 2576 return PTR_ERR(handle); 2577 ret = dquot_commit_info(sb, type); 2578 err = ext3_journal_stop(handle); 2579 if (!ret) 2580 ret = err; 2581 return ret; 2582} 2583 2584/* 2585 * Turn on quotas during mount time - we need to find 2586 * the quota file and such... 2587 */ 2588static int ext3_quota_on_mount(struct super_block *sb, int type) 2589{ 2590 return vfs_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type], 2591 EXT3_SB(sb)->s_jquota_fmt, type); 2592} 2593 2594/* 2595 * Standard function to be called on quota_on 2596 */ 2597static int ext3_quota_on(struct super_block *sb, int type, int format_id, 2598 char *path) 2599{ 2600 int err; 2601 struct nameidata nd; 2602 2603 if (!test_opt(sb, QUOTA)) 2604 return -EINVAL; 2605 /* Not journalling quota? */ 2606 if (!EXT3_SB(sb)->s_qf_names[USRQUOTA] && 2607 !EXT3_SB(sb)->s_qf_names[GRPQUOTA]) 2608 return vfs_quota_on(sb, type, format_id, path); 2609 err = path_lookup(path, LOOKUP_FOLLOW, &nd); 2610 if (err) 2611 return err; 2612 /* Quotafile not on the same filesystem? */ 2613 if (nd.mnt->mnt_sb != sb) { 2614 path_release(&nd); 2615 return -EXDEV; 2616 } 2617 /* Quotafile not of fs root? */ 2618 if (nd.dentry->d_parent->d_inode != sb->s_root->d_inode) 2619 printk(KERN_WARNING 2620 "EXT3-fs: Quota file not on filesystem root. " 2621 "Journalled quota will not work.\n"); 2622 path_release(&nd); 2623 return vfs_quota_on(sb, type, format_id, path); 2624} 2625 2626/* Read data from quotafile - avoid pagecache and such because we cannot afford 2627 * acquiring the locks... As quota files are never truncated and quota code 2628 * itself serializes the operations (and noone else should touch the files) 2629 * we don't have to be afraid of races */ 2630static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data, 2631 size_t len, loff_t off) 2632{ 2633 struct inode *inode = sb_dqopt(sb)->files[type]; 2634 sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb); 2635 int err = 0; 2636 int offset = off & (sb->s_blocksize - 1); 2637 int tocopy; 2638 size_t toread; 2639 struct buffer_head *bh; 2640 loff_t i_size = i_size_read(inode); 2641 2642 if (off > i_size) 2643 return 0; 2644 if (off+len > i_size) 2645 len = i_size-off; 2646 toread = len; 2647 while (toread > 0) { 2648 tocopy = sb->s_blocksize - offset < toread ? 2649 sb->s_blocksize - offset : toread; 2650 bh = ext3_bread(NULL, inode, blk, 0, &err); 2651 if (err) 2652 return err; 2653 if (!bh) /* A hole? */ 2654 memset(data, 0, tocopy); 2655 else 2656 memcpy(data, bh->b_data+offset, tocopy); 2657 brelse(bh); 2658 offset = 0; 2659 toread -= tocopy; 2660 data += tocopy; 2661 blk++; 2662 } 2663 return len; 2664} 2665 2666/* Write to quotafile (we know the transaction is already started and has 2667 * enough credits) */ 2668static ssize_t ext3_quota_write(struct super_block *sb, int type, 2669 const char *data, size_t len, loff_t off) 2670{ 2671 struct inode *inode = sb_dqopt(sb)->files[type]; 2672 sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb); 2673 int err = 0; 2674 int offset = off & (sb->s_blocksize - 1); 2675 int tocopy; 2676 int journal_quota = EXT3_SB(sb)->s_qf_names[type] != NULL; 2677 size_t towrite = len; 2678 struct buffer_head *bh; 2679 handle_t *handle = journal_current_handle(); 2680 2681 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); 2682 while (towrite > 0) { 2683 tocopy = sb->s_blocksize - offset < towrite ? 2684 sb->s_blocksize - offset : towrite; 2685 bh = ext3_bread(handle, inode, blk, 1, &err); 2686 if (!bh) 2687 goto out; 2688 if (journal_quota) { 2689 err = ext3_journal_get_write_access(handle, bh); 2690 if (err) { 2691 brelse(bh); 2692 goto out; 2693 } 2694 } 2695 lock_buffer(bh); 2696 memcpy(bh->b_data+offset, data, tocopy); 2697 flush_dcache_page(bh->b_page); 2698 unlock_buffer(bh); 2699 if (journal_quota) 2700 err = ext3_journal_dirty_metadata(handle, bh); 2701 else { 2702 /* Always do at least ordered writes for quotas */ 2703 err = ext3_journal_dirty_data(handle, bh); 2704 mark_buffer_dirty(bh); 2705 } 2706 brelse(bh); 2707 if (err) 2708 goto out; 2709 offset = 0; 2710 towrite -= tocopy; 2711 data += tocopy; 2712 blk++; 2713 } 2714out: 2715 if (len == towrite) 2716 return err; 2717 if (inode->i_size < off+len-towrite) { 2718 i_size_write(inode, off+len-towrite); 2719 EXT3_I(inode)->i_disksize = inode->i_size; 2720 } 2721 inode->i_version++; 2722 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 2723 ext3_mark_inode_dirty(handle, inode); 2724 mutex_unlock(&inode->i_mutex); 2725 return len - towrite; 2726} 2727 2728#endif 2729 2730static int ext3_get_sb(struct file_system_type *fs_type, 2731 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 2732{ 2733 return get_sb_bdev(fs_type, flags, dev_name, data, ext3_fill_super, mnt); 2734} 2735 2736static struct file_system_type ext3_fs_type = { 2737 .owner = THIS_MODULE, 2738 .name = "ext3", 2739 .get_sb = ext3_get_sb, 2740 .kill_sb = kill_block_super, 2741 .fs_flags = FS_REQUIRES_DEV, 2742}; 2743 2744static int __init init_ext3_fs(void) 2745{ 2746 int err = init_ext3_xattr(); 2747 if (err) 2748 return err; 2749 err = init_inodecache(); 2750 if (err) 2751 goto out1; 2752 err = register_filesystem(&ext3_fs_type); 2753 if (err) 2754 goto out; 2755 return 0; 2756out: 2757 destroy_inodecache(); 2758out1: 2759 exit_ext3_xattr(); 2760 return err; 2761} 2762 2763static void __exit exit_ext3_fs(void) 2764{ 2765 unregister_filesystem(&ext3_fs_type); 2766 destroy_inodecache(); 2767 exit_ext3_xattr(); 2768} 2769 2770MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); 2771MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); 2772MODULE_LICENSE("GPL"); 2773module_init(init_ext3_fs) 2774module_exit(exit_ext3_fs) 2775