1/* 2 * linux/fs/ext3/super.c 3 * 4 * Copyright (C) 1992, 1993, 1994, 1995 5 * Remy Card (card@masi.ibp.fr) 6 * Laboratoire MASI - Institut Blaise Pascal 7 * Universite Pierre et Marie Curie (Paris VI) 8 * 9 * from 10 * 11 * linux/fs/minix/inode.c 12 * 13 * Copyright (C) 1991, 1992 Linus Torvalds 14 * 15 * Big-endian to little-endian byte-swapping/bitmaps by 16 * David S. Miller (davem@caip.rutgers.edu), 1995 17 */ 18 19#include <linux/module.h> 20#include <linux/string.h> 21#include <linux/fs.h> 22#include <linux/time.h> 23#include <linux/jbd.h> 24#include <linux/ext3_fs.h> 25#include <linux/ext3_jbd.h> 26#include <linux/slab.h> 27#include <linux/init.h> 28#include <linux/blkdev.h> 29#include <linux/parser.h> 30#include <linux/smp_lock.h> 31#include <linux/buffer_head.h> 32#include <linux/exportfs.h> 33#include <linux/vfs.h> 34#include <linux/random.h> 35#include <linux/mount.h> 36#include <linux/namei.h> 37#include <linux/quotaops.h> 38#include <linux/seq_file.h> 39#include <linux/log2.h> 40 41#include <asm/uaccess.h> 42 43#include "xattr.h" 44#include "acl.h" 45#include "namei.h" 46 47#ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED 48 #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA 49#else 50 #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_WRITEBACK_DATA 51#endif 52 53static int ext3_load_journal(struct super_block *, struct ext3_super_block *, 54 unsigned long journal_devnum); 55static int ext3_create_journal(struct super_block *, struct ext3_super_block *, 56 unsigned int); 57static int ext3_commit_super(struct super_block *sb, 58 struct ext3_super_block *es, 59 int sync); 60static void ext3_mark_recovery_complete(struct super_block * sb, 61 struct ext3_super_block * es); 62static void ext3_clear_journal_err(struct super_block * sb, 63 struct ext3_super_block * es); 64static int ext3_sync_fs(struct super_block *sb, int wait); 65static const char *ext3_decode_error(struct super_block * sb, int errno, 66 char nbuf[16]); 67static int ext3_remount (struct super_block * sb, int * flags, char * data); 68static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf); 69static int ext3_unfreeze(struct super_block *sb); 70static int ext3_freeze(struct super_block *sb); 71 72/* 73 * Wrappers for journal_start/end. 74 * 75 * The only special thing we need to do here is to make sure that all 76 * journal_end calls result in the superblock being marked dirty, so 77 * that sync() will call the filesystem's write_super callback if 78 * appropriate. 79 */ 80handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks) 81{ 82 journal_t *journal; 83 84 if (sb->s_flags & MS_RDONLY) 85 return ERR_PTR(-EROFS); 86 87 /* Special case here: if the journal has aborted behind our 88 * backs (eg. EIO in the commit thread), then we still need to 89 * take the FS itself readonly cleanly. */ 90 journal = EXT3_SB(sb)->s_journal; 91 if (is_journal_aborted(journal)) { 92 ext3_abort(sb, __func__, 93 "Detected aborted journal"); 94 return ERR_PTR(-EROFS); 95 } 96 97 return journal_start(journal, nblocks); 98} 99 100/* 101 * The only special thing we need to do here is to make sure that all 102 * journal_stop calls result in the superblock being marked dirty, so 103 * that sync() will call the filesystem's write_super callback if 104 * appropriate. 105 */ 106int __ext3_journal_stop(const char *where, handle_t *handle) 107{ 108 struct super_block *sb; 109 int err; 110 int rc; 111 112 sb = handle->h_transaction->t_journal->j_private; 113 err = handle->h_err; 114 rc = journal_stop(handle); 115 116 if (!err) 117 err = rc; 118 if (err) 119 __ext3_std_error(sb, where, err); 120 return err; 121} 122 123void ext3_journal_abort_handle(const char *caller, const char *err_fn, 124 struct buffer_head *bh, handle_t *handle, int err) 125{ 126 char nbuf[16]; 127 const char *errstr = ext3_decode_error(NULL, err, nbuf); 128 129 if (bh) 130 BUFFER_TRACE(bh, "abort"); 131 132 if (!handle->h_err) 133 handle->h_err = err; 134 135 if (is_handle_aborted(handle)) 136 return; 137 138 printk(KERN_ERR "EXT3-fs: %s: aborting transaction: %s in %s\n", 139 caller, errstr, err_fn); 140 141 journal_abort_handle(handle); 142} 143 144void ext3_msg(struct super_block *sb, const char *prefix, 145 const char *fmt, ...) 146{ 147 va_list args; 148 149 va_start(args, fmt); 150 printk("%sEXT3-fs (%s): ", prefix, sb->s_id); 151 vprintk(fmt, args); 152 printk("\n"); 153 va_end(args); 154} 155 156/* Deal with the reporting of failure conditions on a filesystem such as 157 * inconsistencies detected or read IO failures. 158 * 159 * On ext2, we can store the error state of the filesystem in the 160 * superblock. That is not possible on ext3, because we may have other 161 * write ordering constraints on the superblock which prevent us from 162 * writing it out straight away; and given that the journal is about to 163 * be aborted, we can't rely on the current, or future, transactions to 164 * write out the superblock safely. 165 * 166 * We'll just use the journal_abort() error code to record an error in 167 * the journal instead. On recovery, the journal will complain about 168 * that error until we've noted it down and cleared it. 169 */ 170 171static void ext3_handle_error(struct super_block *sb) 172{ 173 struct ext3_super_block *es = EXT3_SB(sb)->s_es; 174 175 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; 176 es->s_state |= cpu_to_le16(EXT3_ERROR_FS); 177 178 if (sb->s_flags & MS_RDONLY) 179 return; 180 181 if (!test_opt (sb, ERRORS_CONT)) { 182 journal_t *journal = EXT3_SB(sb)->s_journal; 183 184 set_opt(EXT3_SB(sb)->s_mount_opt, ABORT); 185 if (journal) 186 journal_abort(journal, -EIO); 187 } 188 if (test_opt (sb, ERRORS_RO)) { 189 ext3_msg(sb, KERN_CRIT, 190 "error: remounting filesystem read-only"); 191 sb->s_flags |= MS_RDONLY; 192 } 193 ext3_commit_super(sb, es, 1); 194 if (test_opt(sb, ERRORS_PANIC)) 195 panic("EXT3-fs (%s): panic forced after error\n", 196 sb->s_id); 197} 198 199void ext3_error (struct super_block * sb, const char * function, 200 const char * fmt, ...) 201{ 202 va_list args; 203 204 va_start(args, fmt); 205 printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function); 206 vprintk(fmt, args); 207 printk("\n"); 208 va_end(args); 209 210 ext3_handle_error(sb); 211} 212 213static const char *ext3_decode_error(struct super_block * sb, int errno, 214 char nbuf[16]) 215{ 216 char *errstr = NULL; 217 218 switch (errno) { 219 case -EIO: 220 errstr = "IO failure"; 221 break; 222 case -ENOMEM: 223 errstr = "Out of memory"; 224 break; 225 case -EROFS: 226 if (!sb || EXT3_SB(sb)->s_journal->j_flags & JFS_ABORT) 227 errstr = "Journal has aborted"; 228 else 229 errstr = "Readonly filesystem"; 230 break; 231 default: 232 /* If the caller passed in an extra buffer for unknown 233 * errors, textualise them now. Else we just return 234 * NULL. */ 235 if (nbuf) { 236 /* Check for truncated error codes... */ 237 if (snprintf(nbuf, 16, "error %d", -errno) >= 0) 238 errstr = nbuf; 239 } 240 break; 241 } 242 243 return errstr; 244} 245 246/* __ext3_std_error decodes expected errors from journaling functions 247 * automatically and invokes the appropriate error response. */ 248 249void __ext3_std_error (struct super_block * sb, const char * function, 250 int errno) 251{ 252 char nbuf[16]; 253 const char *errstr; 254 255 /* Special case: if the error is EROFS, and we're not already 256 * inside a transaction, then there's really no point in logging 257 * an error. */ 258 if (errno == -EROFS && journal_current_handle() == NULL && 259 (sb->s_flags & MS_RDONLY)) 260 return; 261 262 errstr = ext3_decode_error(sb, errno, nbuf); 263 ext3_msg(sb, KERN_CRIT, "error in %s: %s", function, errstr); 264 265 ext3_handle_error(sb); 266} 267 268/* 269 * ext3_abort is a much stronger failure handler than ext3_error. The 270 * abort function may be used to deal with unrecoverable failures such 271 * as journal IO errors or ENOMEM at a critical moment in log management. 272 * 273 * We unconditionally force the filesystem into an ABORT|READONLY state, 274 * unless the error response on the fs has been set to panic in which 275 * case we take the easy way out and panic immediately. 276 */ 277 278void ext3_abort (struct super_block * sb, const char * function, 279 const char * fmt, ...) 280{ 281 va_list args; 282 283 va_start(args, fmt); 284 printk(KERN_CRIT "EXT3-fs (%s): error: %s: ", sb->s_id, function); 285 vprintk(fmt, args); 286 printk("\n"); 287 va_end(args); 288 289 if (test_opt(sb, ERRORS_PANIC)) 290 panic("EXT3-fs: panic from previous error\n"); 291 292 if (sb->s_flags & MS_RDONLY) 293 return; 294 295 ext3_msg(sb, KERN_CRIT, 296 "error: remounting filesystem read-only"); 297 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; 298 sb->s_flags |= MS_RDONLY; 299 set_opt(EXT3_SB(sb)->s_mount_opt, ABORT); 300 if (EXT3_SB(sb)->s_journal) 301 journal_abort(EXT3_SB(sb)->s_journal, -EIO); 302} 303 304void ext3_warning (struct super_block * sb, const char * function, 305 const char * fmt, ...) 306{ 307 va_list args; 308 309 va_start(args, fmt); 310 printk(KERN_WARNING "EXT3-fs (%s): warning: %s: ", 311 sb->s_id, function); 312 vprintk(fmt, args); 313 printk("\n"); 314 va_end(args); 315} 316 317void ext3_update_dynamic_rev(struct super_block *sb) 318{ 319 struct ext3_super_block *es = EXT3_SB(sb)->s_es; 320 321 if (le32_to_cpu(es->s_rev_level) > EXT3_GOOD_OLD_REV) 322 return; 323 324 ext3_msg(sb, KERN_WARNING, 325 "warning: updating to rev %d because of " 326 "new feature flag, running e2fsck is recommended", 327 EXT3_DYNAMIC_REV); 328 329 es->s_first_ino = cpu_to_le32(EXT3_GOOD_OLD_FIRST_INO); 330 es->s_inode_size = cpu_to_le16(EXT3_GOOD_OLD_INODE_SIZE); 331 es->s_rev_level = cpu_to_le32(EXT3_DYNAMIC_REV); 332 /* leave es->s_feature_*compat flags alone */ 333 /* es->s_uuid will be set by e2fsck if empty */ 334 335 /* 336 * The rest of the superblock fields should be zero, and if not it 337 * means they are likely already in use, so leave them alone. We 338 * can leave it up to e2fsck to clean up any inconsistencies there. 339 */ 340} 341 342/* 343 * Open the external journal device 344 */ 345static struct block_device *ext3_blkdev_get(dev_t dev, struct super_block *sb) 346{ 347 struct block_device *bdev; 348 char b[BDEVNAME_SIZE]; 349 350 bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); 351 if (IS_ERR(bdev)) 352 goto fail; 353 return bdev; 354 355fail: 356 ext3_msg(sb, "error: failed to open journal device %s: %ld", 357 __bdevname(dev, b), PTR_ERR(bdev)); 358 359 return NULL; 360} 361 362/* 363 * Release the journal device 364 */ 365static int ext3_blkdev_put(struct block_device *bdev) 366{ 367 bd_release(bdev); 368 return blkdev_put(bdev, FMODE_READ|FMODE_WRITE); 369} 370 371static int ext3_blkdev_remove(struct ext3_sb_info *sbi) 372{ 373 struct block_device *bdev; 374 int ret = -ENODEV; 375 376 bdev = sbi->journal_bdev; 377 if (bdev) { 378 ret = ext3_blkdev_put(bdev); 379 sbi->journal_bdev = NULL; 380 } 381 return ret; 382} 383 384static inline struct inode *orphan_list_entry(struct list_head *l) 385{ 386 return &list_entry(l, struct ext3_inode_info, i_orphan)->vfs_inode; 387} 388 389static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi) 390{ 391 struct list_head *l; 392 393 ext3_msg(sb, KERN_ERR, "error: sb orphan head is %d", 394 le32_to_cpu(sbi->s_es->s_last_orphan)); 395 396 ext3_msg(sb, KERN_ERR, "sb_info orphan list:"); 397 list_for_each(l, &sbi->s_orphan) { 398 struct inode *inode = orphan_list_entry(l); 399 ext3_msg(sb, KERN_ERR, " " 400 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n", 401 inode->i_sb->s_id, inode->i_ino, inode, 402 inode->i_mode, inode->i_nlink, 403 NEXT_ORPHAN(inode)); 404 } 405} 406 407static void ext3_put_super (struct super_block * sb) 408{ 409 struct ext3_sb_info *sbi = EXT3_SB(sb); 410 struct ext3_super_block *es = sbi->s_es; 411 int i, err; 412 413 dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); 414 415 lock_kernel(); 416 417 ext3_xattr_put_super(sb); 418 err = journal_destroy(sbi->s_journal); 419 sbi->s_journal = NULL; 420 if (err < 0) 421 ext3_abort(sb, __func__, "Couldn't clean up the journal"); 422 423 if (!(sb->s_flags & MS_RDONLY)) { 424 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 425 es->s_state = cpu_to_le16(sbi->s_mount_state); 426 BUFFER_TRACE(sbi->s_sbh, "marking dirty"); 427 mark_buffer_dirty(sbi->s_sbh); 428 ext3_commit_super(sb, es, 1); 429 } 430 431 for (i = 0; i < sbi->s_gdb_count; i++) 432 brelse(sbi->s_group_desc[i]); 433 kfree(sbi->s_group_desc); 434 percpu_counter_destroy(&sbi->s_freeblocks_counter); 435 percpu_counter_destroy(&sbi->s_freeinodes_counter); 436 percpu_counter_destroy(&sbi->s_dirs_counter); 437 brelse(sbi->s_sbh); 438#ifdef CONFIG_QUOTA 439 for (i = 0; i < MAXQUOTAS; i++) 440 kfree(sbi->s_qf_names[i]); 441#endif 442 443 /* Debugging code just in case the in-memory inode orphan list 444 * isn't empty. The on-disk one can be non-empty if we've 445 * detected an error and taken the fs readonly, but the 446 * in-memory list had better be clean by this point. */ 447 if (!list_empty(&sbi->s_orphan)) 448 dump_orphan_list(sb, sbi); 449 J_ASSERT(list_empty(&sbi->s_orphan)); 450 451 invalidate_bdev(sb->s_bdev); 452 if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) { 453 /* 454 * Invalidate the journal device's buffers. We don't want them 455 * floating about in memory - the physical journal device may 456 * hotswapped, and it breaks the `ro-after' testing code. 457 */ 458 sync_blockdev(sbi->journal_bdev); 459 invalidate_bdev(sbi->journal_bdev); 460 ext3_blkdev_remove(sbi); 461 } 462 sb->s_fs_info = NULL; 463 kfree(sbi->s_blockgroup_lock); 464 kfree(sbi); 465 466 unlock_kernel(); 467} 468 469static struct kmem_cache *ext3_inode_cachep; 470 471/* 472 * Called inside transaction, so use GFP_NOFS 473 */ 474static struct inode *ext3_alloc_inode(struct super_block *sb) 475{ 476 struct ext3_inode_info *ei; 477 478 ei = kmem_cache_alloc(ext3_inode_cachep, GFP_NOFS); 479 if (!ei) 480 return NULL; 481 ei->i_block_alloc_info = NULL; 482 ei->vfs_inode.i_version = 1; 483 atomic_set(&ei->i_datasync_tid, 0); 484 atomic_set(&ei->i_sync_tid, 0); 485 return &ei->vfs_inode; 486} 487 488static void ext3_destroy_inode(struct inode *inode) 489{ 490 if (!list_empty(&(EXT3_I(inode)->i_orphan))) { 491 printk("EXT3 Inode %p: orphan list check failed!\n", 492 EXT3_I(inode)); 493 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, 494 EXT3_I(inode), sizeof(struct ext3_inode_info), 495 false); 496 dump_stack(); 497 } 498 kmem_cache_free(ext3_inode_cachep, EXT3_I(inode)); 499} 500 501static void init_once(void *foo) 502{ 503 struct ext3_inode_info *ei = (struct ext3_inode_info *) foo; 504 505 INIT_LIST_HEAD(&ei->i_orphan); 506#ifdef CONFIG_EXT3_FS_XATTR 507 init_rwsem(&ei->xattr_sem); 508#endif 509 mutex_init(&ei->truncate_mutex); 510 inode_init_once(&ei->vfs_inode); 511} 512 513static int init_inodecache(void) 514{ 515 ext3_inode_cachep = kmem_cache_create("ext3_inode_cache", 516 sizeof(struct ext3_inode_info), 517 0, (SLAB_RECLAIM_ACCOUNT| 518 SLAB_MEM_SPREAD), 519 init_once); 520 if (ext3_inode_cachep == NULL) 521 return -ENOMEM; 522 return 0; 523} 524 525static void destroy_inodecache(void) 526{ 527 kmem_cache_destroy(ext3_inode_cachep); 528} 529 530static inline void ext3_show_quota_options(struct seq_file *seq, struct super_block *sb) 531{ 532#if defined(CONFIG_QUOTA) 533 struct ext3_sb_info *sbi = EXT3_SB(sb); 534 535 if (sbi->s_jquota_fmt) { 536 char *fmtname = ""; 537 538 switch (sbi->s_jquota_fmt) { 539 case QFMT_VFS_OLD: 540 fmtname = "vfsold"; 541 break; 542 case QFMT_VFS_V0: 543 fmtname = "vfsv0"; 544 break; 545 case QFMT_VFS_V1: 546 fmtname = "vfsv1"; 547 break; 548 } 549 seq_printf(seq, ",jqfmt=%s", fmtname); 550 } 551 552 if (sbi->s_qf_names[USRQUOTA]) 553 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); 554 555 if (sbi->s_qf_names[GRPQUOTA]) 556 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); 557 558 if (test_opt(sb, USRQUOTA)) 559 seq_puts(seq, ",usrquota"); 560 561 if (test_opt(sb, GRPQUOTA)) 562 seq_puts(seq, ",grpquota"); 563#endif 564} 565 566static char *data_mode_string(unsigned long mode) 567{ 568 switch (mode) { 569 case EXT3_MOUNT_JOURNAL_DATA: 570 return "journal"; 571 case EXT3_MOUNT_ORDERED_DATA: 572 return "ordered"; 573 case EXT3_MOUNT_WRITEBACK_DATA: 574 return "writeback"; 575 } 576 return "unknown"; 577} 578 579/* 580 * Show an option if 581 * - it's set to a non-default value OR 582 * - if the per-sb default is different from the global default 583 */ 584static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs) 585{ 586 struct super_block *sb = vfs->mnt_sb; 587 struct ext3_sb_info *sbi = EXT3_SB(sb); 588 struct ext3_super_block *es = sbi->s_es; 589 unsigned long def_mount_opts; 590 591 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 592 593 if (sbi->s_sb_block != 1) 594 seq_printf(seq, ",sb=%lu", sbi->s_sb_block); 595 if (test_opt(sb, MINIX_DF)) 596 seq_puts(seq, ",minixdf"); 597 if (test_opt(sb, GRPID)) 598 seq_puts(seq, ",grpid"); 599 if (!test_opt(sb, GRPID) && (def_mount_opts & EXT3_DEFM_BSDGROUPS)) 600 seq_puts(seq, ",nogrpid"); 601 if (sbi->s_resuid != EXT3_DEF_RESUID || 602 le16_to_cpu(es->s_def_resuid) != EXT3_DEF_RESUID) { 603 seq_printf(seq, ",resuid=%u", sbi->s_resuid); 604 } 605 if (sbi->s_resgid != EXT3_DEF_RESGID || 606 le16_to_cpu(es->s_def_resgid) != EXT3_DEF_RESGID) { 607 seq_printf(seq, ",resgid=%u", sbi->s_resgid); 608 } 609 if (test_opt(sb, ERRORS_RO)) { 610 int def_errors = le16_to_cpu(es->s_errors); 611 612 if (def_errors == EXT3_ERRORS_PANIC || 613 def_errors == EXT3_ERRORS_CONTINUE) { 614 seq_puts(seq, ",errors=remount-ro"); 615 } 616 } 617 if (test_opt(sb, ERRORS_CONT)) 618 seq_puts(seq, ",errors=continue"); 619 if (test_opt(sb, ERRORS_PANIC)) 620 seq_puts(seq, ",errors=panic"); 621 if (test_opt(sb, NO_UID32)) 622 seq_puts(seq, ",nouid32"); 623 if (test_opt(sb, DEBUG)) 624 seq_puts(seq, ",debug"); 625 if (test_opt(sb, OLDALLOC)) 626 seq_puts(seq, ",oldalloc"); 627#ifdef CONFIG_EXT3_FS_XATTR 628 if (test_opt(sb, XATTR_USER)) 629 seq_puts(seq, ",user_xattr"); 630 if (!test_opt(sb, XATTR_USER) && 631 (def_mount_opts & EXT3_DEFM_XATTR_USER)) { 632 seq_puts(seq, ",nouser_xattr"); 633 } 634#endif 635#ifdef CONFIG_EXT3_FS_POSIX_ACL 636 if (test_opt(sb, POSIX_ACL)) 637 seq_puts(seq, ",acl"); 638 if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT3_DEFM_ACL)) 639 seq_puts(seq, ",noacl"); 640#endif 641 if (!test_opt(sb, RESERVATION)) 642 seq_puts(seq, ",noreservation"); 643 if (sbi->s_commit_interval) { 644 seq_printf(seq, ",commit=%u", 645 (unsigned) (sbi->s_commit_interval / HZ)); 646 } 647 648 /* 649 * Always display barrier state so it's clear what the status is. 650 */ 651 seq_puts(seq, ",barrier="); 652 seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); 653 seq_printf(seq, ",data=%s", data_mode_string(test_opt(sb, DATA_FLAGS))); 654 if (test_opt(sb, DATA_ERR_ABORT)) 655 seq_puts(seq, ",data_err=abort"); 656 657 if (test_opt(sb, NOLOAD)) 658 seq_puts(seq, ",norecovery"); 659 660 ext3_show_quota_options(seq, sb); 661 662 return 0; 663} 664 665 666static struct inode *ext3_nfs_get_inode(struct super_block *sb, 667 u64 ino, u32 generation) 668{ 669 struct inode *inode; 670 671 if (ino < EXT3_FIRST_INO(sb) && ino != EXT3_ROOT_INO) 672 return ERR_PTR(-ESTALE); 673 if (ino > le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count)) 674 return ERR_PTR(-ESTALE); 675 676 /* iget isn't really right if the inode is currently unallocated!! 677 * 678 * ext3_read_inode will return a bad_inode if the inode had been 679 * deleted, so we should be safe. 680 * 681 * Currently we don't know the generation for parent directory, so 682 * a generation of 0 means "accept any" 683 */ 684 inode = ext3_iget(sb, ino); 685 if (IS_ERR(inode)) 686 return ERR_CAST(inode); 687 if (generation && inode->i_generation != generation) { 688 iput(inode); 689 return ERR_PTR(-ESTALE); 690 } 691 692 return inode; 693} 694 695static struct dentry *ext3_fh_to_dentry(struct super_block *sb, struct fid *fid, 696 int fh_len, int fh_type) 697{ 698 return generic_fh_to_dentry(sb, fid, fh_len, fh_type, 699 ext3_nfs_get_inode); 700} 701 702static struct dentry *ext3_fh_to_parent(struct super_block *sb, struct fid *fid, 703 int fh_len, int fh_type) 704{ 705 return generic_fh_to_parent(sb, fid, fh_len, fh_type, 706 ext3_nfs_get_inode); 707} 708 709/* 710 * Try to release metadata pages (indirect blocks, directories) which are 711 * mapped via the block device. Since these pages could have journal heads 712 * which would prevent try_to_free_buffers() from freeing them, we must use 713 * jbd layer's try_to_free_buffers() function to release them. 714 */ 715static int bdev_try_to_free_page(struct super_block *sb, struct page *page, 716 gfp_t wait) 717{ 718 journal_t *journal = EXT3_SB(sb)->s_journal; 719 720 WARN_ON(PageChecked(page)); 721 if (!page_has_buffers(page)) 722 return 0; 723 if (journal) 724 return journal_try_to_free_buffers(journal, page, 725 wait & ~__GFP_WAIT); 726 return try_to_free_buffers(page); 727} 728 729#ifdef CONFIG_QUOTA 730#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group") 731#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) 732 733static int ext3_write_dquot(struct dquot *dquot); 734static int ext3_acquire_dquot(struct dquot *dquot); 735static int ext3_release_dquot(struct dquot *dquot); 736static int ext3_mark_dquot_dirty(struct dquot *dquot); 737static int ext3_write_info(struct super_block *sb, int type); 738static int ext3_quota_on(struct super_block *sb, int type, int format_id, 739 char *path); 740static int ext3_quota_on_mount(struct super_block *sb, int type); 741static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data, 742 size_t len, loff_t off); 743static ssize_t ext3_quota_write(struct super_block *sb, int type, 744 const char *data, size_t len, loff_t off); 745 746static const struct dquot_operations ext3_quota_operations = { 747 .write_dquot = ext3_write_dquot, 748 .acquire_dquot = ext3_acquire_dquot, 749 .release_dquot = ext3_release_dquot, 750 .mark_dirty = ext3_mark_dquot_dirty, 751 .write_info = ext3_write_info, 752 .alloc_dquot = dquot_alloc, 753 .destroy_dquot = dquot_destroy, 754}; 755 756static const struct quotactl_ops ext3_qctl_operations = { 757 .quota_on = ext3_quota_on, 758 .quota_off = dquot_quota_off, 759 .quota_sync = dquot_quota_sync, 760 .get_info = dquot_get_dqinfo, 761 .set_info = dquot_set_dqinfo, 762 .get_dqblk = dquot_get_dqblk, 763 .set_dqblk = dquot_set_dqblk 764}; 765#endif 766 767static const struct super_operations ext3_sops = { 768 .alloc_inode = ext3_alloc_inode, 769 .destroy_inode = ext3_destroy_inode, 770 .write_inode = ext3_write_inode, 771 .dirty_inode = ext3_dirty_inode, 772 .evict_inode = ext3_evict_inode, 773 .put_super = ext3_put_super, 774 .sync_fs = ext3_sync_fs, 775 .freeze_fs = ext3_freeze, 776 .unfreeze_fs = ext3_unfreeze, 777 .statfs = ext3_statfs, 778 .remount_fs = ext3_remount, 779 .show_options = ext3_show_options, 780#ifdef CONFIG_QUOTA 781 .quota_read = ext3_quota_read, 782 .quota_write = ext3_quota_write, 783#endif 784 .bdev_try_to_free_page = bdev_try_to_free_page, 785}; 786 787static const struct export_operations ext3_export_ops = { 788 .fh_to_dentry = ext3_fh_to_dentry, 789 .fh_to_parent = ext3_fh_to_parent, 790 .get_parent = ext3_get_parent, 791}; 792 793enum { 794 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, 795 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 796 Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, 797 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 798 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, 799 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, 800 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 801 Opt_data_err_abort, Opt_data_err_ignore, 802 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 803 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, 804 Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, 805 Opt_resize, Opt_usrquota, Opt_grpquota 806}; 807 808static const match_table_t tokens = { 809 {Opt_bsd_df, "bsddf"}, 810 {Opt_minix_df, "minixdf"}, 811 {Opt_grpid, "grpid"}, 812 {Opt_grpid, "bsdgroups"}, 813 {Opt_nogrpid, "nogrpid"}, 814 {Opt_nogrpid, "sysvgroups"}, 815 {Opt_resgid, "resgid=%u"}, 816 {Opt_resuid, "resuid=%u"}, 817 {Opt_sb, "sb=%u"}, 818 {Opt_err_cont, "errors=continue"}, 819 {Opt_err_panic, "errors=panic"}, 820 {Opt_err_ro, "errors=remount-ro"}, 821 {Opt_nouid32, "nouid32"}, 822 {Opt_nocheck, "nocheck"}, 823 {Opt_nocheck, "check=none"}, 824 {Opt_debug, "debug"}, 825 {Opt_oldalloc, "oldalloc"}, 826 {Opt_orlov, "orlov"}, 827 {Opt_user_xattr, "user_xattr"}, 828 {Opt_nouser_xattr, "nouser_xattr"}, 829 {Opt_acl, "acl"}, 830 {Opt_noacl, "noacl"}, 831 {Opt_reservation, "reservation"}, 832 {Opt_noreservation, "noreservation"}, 833 {Opt_noload, "noload"}, 834 {Opt_noload, "norecovery"}, 835 {Opt_nobh, "nobh"}, 836 {Opt_bh, "bh"}, 837 {Opt_commit, "commit=%u"}, 838 {Opt_journal_update, "journal=update"}, 839 {Opt_journal_inum, "journal=%u"}, 840 {Opt_journal_dev, "journal_dev=%u"}, 841 {Opt_abort, "abort"}, 842 {Opt_data_journal, "data=journal"}, 843 {Opt_data_ordered, "data=ordered"}, 844 {Opt_data_writeback, "data=writeback"}, 845 {Opt_data_err_abort, "data_err=abort"}, 846 {Opt_data_err_ignore, "data_err=ignore"}, 847 {Opt_offusrjquota, "usrjquota="}, 848 {Opt_usrjquota, "usrjquota=%s"}, 849 {Opt_offgrpjquota, "grpjquota="}, 850 {Opt_grpjquota, "grpjquota=%s"}, 851 {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, 852 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, 853 {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, 854 {Opt_grpquota, "grpquota"}, 855 {Opt_noquota, "noquota"}, 856 {Opt_quota, "quota"}, 857 {Opt_usrquota, "usrquota"}, 858 {Opt_barrier, "barrier=%u"}, 859 {Opt_barrier, "barrier"}, 860 {Opt_nobarrier, "nobarrier"}, 861 {Opt_resize, "resize"}, 862 {Opt_err, NULL}, 863}; 864 865static ext3_fsblk_t get_sb_block(void **data, struct super_block *sb) 866{ 867 ext3_fsblk_t sb_block; 868 char *options = (char *) *data; 869 870 if (!options || strncmp(options, "sb=", 3) != 0) 871 return 1; /* Default location */ 872 options += 3; 873 /*todo: use simple_strtoll with >32bit ext3 */ 874 sb_block = simple_strtoul(options, &options, 0); 875 if (*options && *options != ',') { 876 ext3_msg(sb, "error: invalid sb specification: %s", 877 (char *) *data); 878 return 1; 879 } 880 if (*options == ',') 881 options++; 882 *data = (void *) options; 883 return sb_block; 884} 885 886#ifdef CONFIG_QUOTA 887static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) 888{ 889 struct ext3_sb_info *sbi = EXT3_SB(sb); 890 char *qname; 891 892 if (sb_any_quota_loaded(sb) && 893 !sbi->s_qf_names[qtype]) { 894 ext3_msg(sb, KERN_ERR, 895 "Cannot change journaled " 896 "quota options when quota turned on"); 897 return 0; 898 } 899 qname = match_strdup(args); 900 if (!qname) { 901 ext3_msg(sb, KERN_ERR, 902 "Not enough memory for storing quotafile name"); 903 return 0; 904 } 905 if (sbi->s_qf_names[qtype] && 906 strcmp(sbi->s_qf_names[qtype], qname)) { 907 ext3_msg(sb, KERN_ERR, 908 "%s quota file already specified", QTYPE2NAME(qtype)); 909 kfree(qname); 910 return 0; 911 } 912 sbi->s_qf_names[qtype] = qname; 913 if (strchr(sbi->s_qf_names[qtype], '/')) { 914 ext3_msg(sb, KERN_ERR, 915 "quotafile must be on filesystem root"); 916 kfree(sbi->s_qf_names[qtype]); 917 sbi->s_qf_names[qtype] = NULL; 918 return 0; 919 } 920 set_opt(sbi->s_mount_opt, QUOTA); 921 return 1; 922} 923 924static int clear_qf_name(struct super_block *sb, int qtype) { 925 926 struct ext3_sb_info *sbi = EXT3_SB(sb); 927 928 if (sb_any_quota_loaded(sb) && 929 sbi->s_qf_names[qtype]) { 930 ext3_msg(sb, KERN_ERR, "Cannot change journaled quota options" 931 " when quota turned on"); 932 return 0; 933 } 934 /* 935 * The space will be released later when all options are confirmed 936 * to be correct 937 */ 938 sbi->s_qf_names[qtype] = NULL; 939 return 1; 940} 941#endif 942 943static int parse_options (char *options, struct super_block *sb, 944 unsigned int *inum, unsigned long *journal_devnum, 945 ext3_fsblk_t *n_blocks_count, int is_remount) 946{ 947 struct ext3_sb_info *sbi = EXT3_SB(sb); 948 char * p; 949 substring_t args[MAX_OPT_ARGS]; 950 int data_opt = 0; 951 int option; 952#ifdef CONFIG_QUOTA 953 int qfmt; 954#endif 955 956 if (!options) 957 return 1; 958 959 while ((p = strsep (&options, ",")) != NULL) { 960 int token; 961 if (!*p) 962 continue; 963 /* 964 * Initialize args struct so we know whether arg was 965 * found; some options take optional arguments. 966 */ 967 args[0].to = args[0].from = 0; 968 token = match_token(p, tokens, args); 969 switch (token) { 970 case Opt_bsd_df: 971 clear_opt (sbi->s_mount_opt, MINIX_DF); 972 break; 973 case Opt_minix_df: 974 set_opt (sbi->s_mount_opt, MINIX_DF); 975 break; 976 case Opt_grpid: 977 set_opt (sbi->s_mount_opt, GRPID); 978 break; 979 case Opt_nogrpid: 980 clear_opt (sbi->s_mount_opt, GRPID); 981 break; 982 case Opt_resuid: 983 if (match_int(&args[0], &option)) 984 return 0; 985 sbi->s_resuid = option; 986 break; 987 case Opt_resgid: 988 if (match_int(&args[0], &option)) 989 return 0; 990 sbi->s_resgid = option; 991 break; 992 case Opt_sb: 993 /* handled by get_sb_block() instead of here */ 994 /* *sb_block = match_int(&args[0]); */ 995 break; 996 case Opt_err_panic: 997 clear_opt (sbi->s_mount_opt, ERRORS_CONT); 998 clear_opt (sbi->s_mount_opt, ERRORS_RO); 999 set_opt (sbi->s_mount_opt, ERRORS_PANIC); 1000 break; 1001 case Opt_err_ro: 1002 clear_opt (sbi->s_mount_opt, ERRORS_CONT); 1003 clear_opt (sbi->s_mount_opt, ERRORS_PANIC); 1004 set_opt (sbi->s_mount_opt, ERRORS_RO); 1005 break; 1006 case Opt_err_cont: 1007 clear_opt (sbi->s_mount_opt, ERRORS_RO); 1008 clear_opt (sbi->s_mount_opt, ERRORS_PANIC); 1009 set_opt (sbi->s_mount_opt, ERRORS_CONT); 1010 break; 1011 case Opt_nouid32: 1012 set_opt (sbi->s_mount_opt, NO_UID32); 1013 break; 1014 case Opt_nocheck: 1015 clear_opt (sbi->s_mount_opt, CHECK); 1016 break; 1017 case Opt_debug: 1018 set_opt (sbi->s_mount_opt, DEBUG); 1019 break; 1020 case Opt_oldalloc: 1021 set_opt (sbi->s_mount_opt, OLDALLOC); 1022 break; 1023 case Opt_orlov: 1024 clear_opt (sbi->s_mount_opt, OLDALLOC); 1025 break; 1026#ifdef CONFIG_EXT3_FS_XATTR 1027 case Opt_user_xattr: 1028 set_opt (sbi->s_mount_opt, XATTR_USER); 1029 break; 1030 case Opt_nouser_xattr: 1031 clear_opt (sbi->s_mount_opt, XATTR_USER); 1032 break; 1033#else 1034 case Opt_user_xattr: 1035 case Opt_nouser_xattr: 1036 ext3_msg(sb, KERN_INFO, 1037 "(no)user_xattr options not supported"); 1038 break; 1039#endif 1040#ifdef CONFIG_EXT3_FS_POSIX_ACL 1041 case Opt_acl: 1042 set_opt(sbi->s_mount_opt, POSIX_ACL); 1043 break; 1044 case Opt_noacl: 1045 clear_opt(sbi->s_mount_opt, POSIX_ACL); 1046 break; 1047#else 1048 case Opt_acl: 1049 case Opt_noacl: 1050 ext3_msg(sb, KERN_INFO, 1051 "(no)acl options not supported"); 1052 break; 1053#endif 1054 case Opt_reservation: 1055 set_opt(sbi->s_mount_opt, RESERVATION); 1056 break; 1057 case Opt_noreservation: 1058 clear_opt(sbi->s_mount_opt, RESERVATION); 1059 break; 1060 case Opt_journal_update: 1061 /* Eventually we will want to be able to create 1062 a journal file here. For now, only allow the 1063 user to specify an existing inode to be the 1064 journal file. */ 1065 if (is_remount) { 1066 ext3_msg(sb, KERN_ERR, "error: cannot specify " 1067 "journal on remount"); 1068 return 0; 1069 } 1070 set_opt (sbi->s_mount_opt, UPDATE_JOURNAL); 1071 break; 1072 case Opt_journal_inum: 1073 if (is_remount) { 1074 ext3_msg(sb, KERN_ERR, "error: cannot specify " 1075 "journal on remount"); 1076 return 0; 1077 } 1078 if (match_int(&args[0], &option)) 1079 return 0; 1080 *inum = option; 1081 break; 1082 case Opt_journal_dev: 1083 if (is_remount) { 1084 ext3_msg(sb, KERN_ERR, "error: cannot specify " 1085 "journal on remount"); 1086 return 0; 1087 } 1088 if (match_int(&args[0], &option)) 1089 return 0; 1090 *journal_devnum = option; 1091 break; 1092 case Opt_noload: 1093 set_opt (sbi->s_mount_opt, NOLOAD); 1094 break; 1095 case Opt_commit: 1096 if (match_int(&args[0], &option)) 1097 return 0; 1098 if (option < 0) 1099 return 0; 1100 if (option == 0) 1101 option = JBD_DEFAULT_MAX_COMMIT_AGE; 1102 sbi->s_commit_interval = HZ * option; 1103 break; 1104 case Opt_data_journal: 1105 data_opt = EXT3_MOUNT_JOURNAL_DATA; 1106 goto datacheck; 1107 case Opt_data_ordered: 1108 data_opt = EXT3_MOUNT_ORDERED_DATA; 1109 goto datacheck; 1110 case Opt_data_writeback: 1111 data_opt = EXT3_MOUNT_WRITEBACK_DATA; 1112 datacheck: 1113 if (is_remount) { 1114 if (test_opt(sb, DATA_FLAGS) == data_opt) 1115 break; 1116 ext3_msg(sb, KERN_ERR, 1117 "error: cannot change " 1118 "data mode on remount. The filesystem " 1119 "is mounted in data=%s mode and you " 1120 "try to remount it in data=%s mode.", 1121 data_mode_string(test_opt(sb, 1122 DATA_FLAGS)), 1123 data_mode_string(data_opt)); 1124 return 0; 1125 } else { 1126 clear_opt(sbi->s_mount_opt, DATA_FLAGS); 1127 sbi->s_mount_opt |= data_opt; 1128 } 1129 break; 1130 case Opt_data_err_abort: 1131 set_opt(sbi->s_mount_opt, DATA_ERR_ABORT); 1132 break; 1133 case Opt_data_err_ignore: 1134 clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); 1135 break; 1136#ifdef CONFIG_QUOTA 1137 case Opt_usrjquota: 1138 if (!set_qf_name(sb, USRQUOTA, &args[0])) 1139 return 0; 1140 break; 1141 case Opt_grpjquota: 1142 if (!set_qf_name(sb, GRPQUOTA, &args[0])) 1143 return 0; 1144 break; 1145 case Opt_offusrjquota: 1146 if (!clear_qf_name(sb, USRQUOTA)) 1147 return 0; 1148 break; 1149 case Opt_offgrpjquota: 1150 if (!clear_qf_name(sb, GRPQUOTA)) 1151 return 0; 1152 break; 1153 case Opt_jqfmt_vfsold: 1154 qfmt = QFMT_VFS_OLD; 1155 goto set_qf_format; 1156 case Opt_jqfmt_vfsv0: 1157 qfmt = QFMT_VFS_V0; 1158 goto set_qf_format; 1159 case Opt_jqfmt_vfsv1: 1160 qfmt = QFMT_VFS_V1; 1161set_qf_format: 1162 if (sb_any_quota_loaded(sb) && 1163 sbi->s_jquota_fmt != qfmt) { 1164 ext3_msg(sb, KERN_ERR, "error: cannot change " 1165 "journaled quota options when " 1166 "quota turned on."); 1167 return 0; 1168 } 1169 sbi->s_jquota_fmt = qfmt; 1170 break; 1171 case Opt_quota: 1172 case Opt_usrquota: 1173 set_opt(sbi->s_mount_opt, QUOTA); 1174 set_opt(sbi->s_mount_opt, USRQUOTA); 1175 break; 1176 case Opt_grpquota: 1177 set_opt(sbi->s_mount_opt, QUOTA); 1178 set_opt(sbi->s_mount_opt, GRPQUOTA); 1179 break; 1180 case Opt_noquota: 1181 if (sb_any_quota_loaded(sb)) { 1182 ext3_msg(sb, KERN_ERR, "error: cannot change " 1183 "quota options when quota turned on."); 1184 return 0; 1185 } 1186 clear_opt(sbi->s_mount_opt, QUOTA); 1187 clear_opt(sbi->s_mount_opt, USRQUOTA); 1188 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1189 break; 1190#else 1191 case Opt_quota: 1192 case Opt_usrquota: 1193 case Opt_grpquota: 1194 ext3_msg(sb, KERN_ERR, 1195 "error: quota options not supported."); 1196 break; 1197 case Opt_usrjquota: 1198 case Opt_grpjquota: 1199 case Opt_offusrjquota: 1200 case Opt_offgrpjquota: 1201 case Opt_jqfmt_vfsold: 1202 case Opt_jqfmt_vfsv0: 1203 case Opt_jqfmt_vfsv1: 1204 ext3_msg(sb, KERN_ERR, 1205 "error: journaled quota options not " 1206 "supported."); 1207 break; 1208 case Opt_noquota: 1209 break; 1210#endif 1211 case Opt_abort: 1212 set_opt(sbi->s_mount_opt, ABORT); 1213 break; 1214 case Opt_nobarrier: 1215 clear_opt(sbi->s_mount_opt, BARRIER); 1216 break; 1217 case Opt_barrier: 1218 if (args[0].from) { 1219 if (match_int(&args[0], &option)) 1220 return 0; 1221 } else 1222 option = 1; /* No argument, default to 1 */ 1223 if (option) 1224 set_opt(sbi->s_mount_opt, BARRIER); 1225 else 1226 clear_opt(sbi->s_mount_opt, BARRIER); 1227 break; 1228 case Opt_ignore: 1229 break; 1230 case Opt_resize: 1231 if (!is_remount) { 1232 ext3_msg(sb, KERN_ERR, 1233 "error: resize option only available " 1234 "for remount"); 1235 return 0; 1236 } 1237 if (match_int(&args[0], &option) != 0) 1238 return 0; 1239 *n_blocks_count = option; 1240 break; 1241 case Opt_nobh: 1242 ext3_msg(sb, KERN_WARNING, 1243 "warning: ignoring deprecated nobh option"); 1244 break; 1245 case Opt_bh: 1246 ext3_msg(sb, KERN_WARNING, 1247 "warning: ignoring deprecated bh option"); 1248 break; 1249 default: 1250 ext3_msg(sb, KERN_ERR, 1251 "error: unrecognized mount option \"%s\" " 1252 "or missing value", p); 1253 return 0; 1254 } 1255 } 1256#ifdef CONFIG_QUOTA 1257 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { 1258 if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) 1259 clear_opt(sbi->s_mount_opt, USRQUOTA); 1260 if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA]) 1261 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1262 1263 if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) { 1264 ext3_msg(sb, KERN_ERR, "error: old and new quota " 1265 "format mixing."); 1266 return 0; 1267 } 1268 1269 if (!sbi->s_jquota_fmt) { 1270 ext3_msg(sb, KERN_ERR, "error: journaled quota format " 1271 "not specified."); 1272 return 0; 1273 } 1274 } else { 1275 if (sbi->s_jquota_fmt) { 1276 ext3_msg(sb, KERN_ERR, "error: journaled quota format " 1277 "specified with no journaling " 1278 "enabled."); 1279 return 0; 1280 } 1281 } 1282#endif 1283 return 1; 1284} 1285 1286static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es, 1287 int read_only) 1288{ 1289 struct ext3_sb_info *sbi = EXT3_SB(sb); 1290 int res = 0; 1291 1292 if (le32_to_cpu(es->s_rev_level) > EXT3_MAX_SUPP_REV) { 1293 ext3_msg(sb, KERN_ERR, 1294 "error: revision level too high, " 1295 "forcing read-only mode"); 1296 res = MS_RDONLY; 1297 } 1298 if (read_only) 1299 return res; 1300 if (!(sbi->s_mount_state & EXT3_VALID_FS)) 1301 ext3_msg(sb, KERN_WARNING, 1302 "warning: mounting unchecked fs, " 1303 "running e2fsck is recommended"); 1304 else if ((sbi->s_mount_state & EXT3_ERROR_FS)) 1305 ext3_msg(sb, KERN_WARNING, 1306 "warning: mounting fs with errors, " 1307 "running e2fsck is recommended"); 1308 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && 1309 le16_to_cpu(es->s_mnt_count) >= 1310 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) 1311 ext3_msg(sb, KERN_WARNING, 1312 "warning: maximal mount count reached, " 1313 "running e2fsck is recommended"); 1314 else if (le32_to_cpu(es->s_checkinterval) && 1315 (le32_to_cpu(es->s_lastcheck) + 1316 le32_to_cpu(es->s_checkinterval) <= get_seconds())) 1317 ext3_msg(sb, KERN_WARNING, 1318 "warning: checktime reached, " 1319 "running e2fsck is recommended"); 1320 if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) 1321 es->s_max_mnt_count = cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT); 1322 le16_add_cpu(&es->s_mnt_count, 1); 1323 es->s_mtime = cpu_to_le32(get_seconds()); 1324 ext3_update_dynamic_rev(sb); 1325 EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 1326 1327 ext3_commit_super(sb, es, 1); 1328 if (test_opt(sb, DEBUG)) 1329 ext3_msg(sb, KERN_INFO, "[bs=%lu, gc=%lu, " 1330 "bpg=%lu, ipg=%lu, mo=%04lx]", 1331 sb->s_blocksize, 1332 sbi->s_groups_count, 1333 EXT3_BLOCKS_PER_GROUP(sb), 1334 EXT3_INODES_PER_GROUP(sb), 1335 sbi->s_mount_opt); 1336 1337 if (EXT3_SB(sb)->s_journal->j_inode == NULL) { 1338 char b[BDEVNAME_SIZE]; 1339 ext3_msg(sb, KERN_INFO, "using external journal on %s", 1340 bdevname(EXT3_SB(sb)->s_journal->j_dev, b)); 1341 } else { 1342 ext3_msg(sb, KERN_INFO, "using internal journal"); 1343 } 1344 return res; 1345} 1346 1347/* Called at mount-time, super-block is locked */ 1348static int ext3_check_descriptors(struct super_block *sb) 1349{ 1350 struct ext3_sb_info *sbi = EXT3_SB(sb); 1351 int i; 1352 1353 ext3_debug ("Checking group descriptors"); 1354 1355 for (i = 0; i < sbi->s_groups_count; i++) { 1356 struct ext3_group_desc *gdp = ext3_get_group_desc(sb, i, NULL); 1357 ext3_fsblk_t first_block = ext3_group_first_block_no(sb, i); 1358 ext3_fsblk_t last_block; 1359 1360 if (i == sbi->s_groups_count - 1) 1361 last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1; 1362 else 1363 last_block = first_block + 1364 (EXT3_BLOCKS_PER_GROUP(sb) - 1); 1365 1366 if (le32_to_cpu(gdp->bg_block_bitmap) < first_block || 1367 le32_to_cpu(gdp->bg_block_bitmap) > last_block) 1368 { 1369 ext3_error (sb, "ext3_check_descriptors", 1370 "Block bitmap for group %d" 1371 " not in group (block %lu)!", 1372 i, (unsigned long) 1373 le32_to_cpu(gdp->bg_block_bitmap)); 1374 return 0; 1375 } 1376 if (le32_to_cpu(gdp->bg_inode_bitmap) < first_block || 1377 le32_to_cpu(gdp->bg_inode_bitmap) > last_block) 1378 { 1379 ext3_error (sb, "ext3_check_descriptors", 1380 "Inode bitmap for group %d" 1381 " not in group (block %lu)!", 1382 i, (unsigned long) 1383 le32_to_cpu(gdp->bg_inode_bitmap)); 1384 return 0; 1385 } 1386 if (le32_to_cpu(gdp->bg_inode_table) < first_block || 1387 le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group - 1 > 1388 last_block) 1389 { 1390 ext3_error (sb, "ext3_check_descriptors", 1391 "Inode table for group %d" 1392 " not in group (block %lu)!", 1393 i, (unsigned long) 1394 le32_to_cpu(gdp->bg_inode_table)); 1395 return 0; 1396 } 1397 } 1398 1399 sbi->s_es->s_free_blocks_count=cpu_to_le32(ext3_count_free_blocks(sb)); 1400 sbi->s_es->s_free_inodes_count=cpu_to_le32(ext3_count_free_inodes(sb)); 1401 return 1; 1402} 1403 1404 1405/* ext3_orphan_cleanup() walks a singly-linked list of inodes (starting at 1406 * the superblock) which were deleted from all directories, but held open by 1407 * a process at the time of a crash. We walk the list and try to delete these 1408 * inodes at recovery time (only with a read-write filesystem). 1409 * 1410 * In order to keep the orphan inode chain consistent during traversal (in 1411 * case of crash during recovery), we link each inode into the superblock 1412 * orphan list_head and handle it the same way as an inode deletion during 1413 * normal operation (which journals the operations for us). 1414 * 1415 * We only do an iget() and an iput() on each inode, which is very safe if we 1416 * accidentally point at an in-use or already deleted inode. The worst that 1417 * can happen in this case is that we get a "bit already cleared" message from 1418 * ext3_free_inode(). The only reason we would point at a wrong inode is if 1419 * e2fsck was run on this filesystem, and it must have already done the orphan 1420 * inode cleanup for us, so we can safely abort without any further action. 1421 */ 1422static void ext3_orphan_cleanup (struct super_block * sb, 1423 struct ext3_super_block * es) 1424{ 1425 unsigned int s_flags = sb->s_flags; 1426 int nr_orphans = 0, nr_truncates = 0; 1427#ifdef CONFIG_QUOTA 1428 int i; 1429#endif 1430 if (!es->s_last_orphan) { 1431 jbd_debug(4, "no orphan inodes to clean up\n"); 1432 return; 1433 } 1434 1435 if (bdev_read_only(sb->s_bdev)) { 1436 ext3_msg(sb, KERN_ERR, "error: write access " 1437 "unavailable, skipping orphan cleanup."); 1438 return; 1439 } 1440 1441 if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) { 1442 if (es->s_last_orphan) 1443 jbd_debug(1, "Errors on filesystem, " 1444 "clearing orphan list.\n"); 1445 es->s_last_orphan = 0; 1446 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); 1447 return; 1448 } 1449 1450 if (s_flags & MS_RDONLY) { 1451 ext3_msg(sb, KERN_INFO, "orphan cleanup on readonly fs"); 1452 sb->s_flags &= ~MS_RDONLY; 1453 } 1454#ifdef CONFIG_QUOTA 1455 /* Needed for iput() to work correctly and not trash data */ 1456 sb->s_flags |= MS_ACTIVE; 1457 /* Turn on quotas so that they are updated correctly */ 1458 for (i = 0; i < MAXQUOTAS; i++) { 1459 if (EXT3_SB(sb)->s_qf_names[i]) { 1460 int ret = ext3_quota_on_mount(sb, i); 1461 if (ret < 0) 1462 ext3_msg(sb, KERN_ERR, 1463 "error: cannot turn on journaled " 1464 "quota: %d", ret); 1465 } 1466 } 1467#endif 1468 1469 while (es->s_last_orphan) { 1470 struct inode *inode; 1471 1472 inode = ext3_orphan_get(sb, le32_to_cpu(es->s_last_orphan)); 1473 if (IS_ERR(inode)) { 1474 es->s_last_orphan = 0; 1475 break; 1476 } 1477 1478 list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan); 1479 dquot_initialize(inode); 1480 if (inode->i_nlink) { 1481 printk(KERN_DEBUG 1482 "%s: truncating inode %lu to %Ld bytes\n", 1483 __func__, inode->i_ino, inode->i_size); 1484 jbd_debug(2, "truncating inode %lu to %Ld bytes\n", 1485 inode->i_ino, inode->i_size); 1486 ext3_truncate(inode); 1487 nr_truncates++; 1488 } else { 1489 printk(KERN_DEBUG 1490 "%s: deleting unreferenced inode %lu\n", 1491 __func__, inode->i_ino); 1492 jbd_debug(2, "deleting unreferenced inode %lu\n", 1493 inode->i_ino); 1494 nr_orphans++; 1495 } 1496 iput(inode); /* The delete magic happens here! */ 1497 } 1498 1499#define PLURAL(x) (x), ((x)==1) ? "" : "s" 1500 1501 if (nr_orphans) 1502 ext3_msg(sb, KERN_INFO, "%d orphan inode%s deleted", 1503 PLURAL(nr_orphans)); 1504 if (nr_truncates) 1505 ext3_msg(sb, KERN_INFO, "%d truncate%s cleaned up", 1506 PLURAL(nr_truncates)); 1507#ifdef CONFIG_QUOTA 1508 /* Turn quotas off */ 1509 for (i = 0; i < MAXQUOTAS; i++) { 1510 if (sb_dqopt(sb)->files[i]) 1511 dquot_quota_off(sb, i); 1512 } 1513#endif 1514 sb->s_flags = s_flags; /* Restore MS_RDONLY status */ 1515} 1516 1517/* 1518 * Maximal file size. There is a direct, and {,double-,triple-}indirect 1519 * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks. 1520 * We need to be 1 filesystem block less than the 2^32 sector limit. 1521 */ 1522static loff_t ext3_max_size(int bits) 1523{ 1524 loff_t res = EXT3_NDIR_BLOCKS; 1525 int meta_blocks; 1526 loff_t upper_limit; 1527 1528 /* This is calculated to be the largest file size for a 1529 * dense, file such that the total number of 1530 * sectors in the file, including data and all indirect blocks, 1531 * does not exceed 2^32 -1 1532 * __u32 i_blocks representing the total number of 1533 * 512 bytes blocks of the file 1534 */ 1535 upper_limit = (1LL << 32) - 1; 1536 1537 /* total blocks in file system block size */ 1538 upper_limit >>= (bits - 9); 1539 1540 1541 /* indirect blocks */ 1542 meta_blocks = 1; 1543 /* double indirect blocks */ 1544 meta_blocks += 1 + (1LL << (bits-2)); 1545 /* tripple indirect blocks */ 1546 meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2))); 1547 1548 upper_limit -= meta_blocks; 1549 upper_limit <<= bits; 1550 1551 res += 1LL << (bits-2); 1552 res += 1LL << (2*(bits-2)); 1553 res += 1LL << (3*(bits-2)); 1554 res <<= bits; 1555 if (res > upper_limit) 1556 res = upper_limit; 1557 1558 if (res > MAX_LFS_FILESIZE) 1559 res = MAX_LFS_FILESIZE; 1560 1561 return res; 1562} 1563 1564static ext3_fsblk_t descriptor_loc(struct super_block *sb, 1565 ext3_fsblk_t logic_sb_block, 1566 int nr) 1567{ 1568 struct ext3_sb_info *sbi = EXT3_SB(sb); 1569 unsigned long bg, first_meta_bg; 1570 int has_super = 0; 1571 1572 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); 1573 1574 if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) || 1575 nr < first_meta_bg) 1576 return (logic_sb_block + nr + 1); 1577 bg = sbi->s_desc_per_block * nr; 1578 if (ext3_bg_has_super(sb, bg)) 1579 has_super = 1; 1580 return (has_super + ext3_group_first_block_no(sb, bg)); 1581} 1582 1583 1584static int ext3_fill_super (struct super_block *sb, void *data, int silent) 1585{ 1586 struct buffer_head * bh; 1587 struct ext3_super_block *es = NULL; 1588 struct ext3_sb_info *sbi; 1589 ext3_fsblk_t block; 1590 ext3_fsblk_t sb_block = get_sb_block(&data, sb); 1591 ext3_fsblk_t logic_sb_block; 1592 unsigned long offset = 0; 1593 unsigned int journal_inum = 0; 1594 unsigned long journal_devnum = 0; 1595 unsigned long def_mount_opts; 1596 struct inode *root; 1597 int blocksize; 1598 int hblock; 1599 int db_count; 1600 int i; 1601 int needs_recovery; 1602 int ret = -EINVAL; 1603 __le32 features; 1604 int err; 1605 1606 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 1607 if (!sbi) 1608 return -ENOMEM; 1609 1610 sbi->s_blockgroup_lock = 1611 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); 1612 if (!sbi->s_blockgroup_lock) { 1613 kfree(sbi); 1614 return -ENOMEM; 1615 } 1616 sb->s_fs_info = sbi; 1617 sbi->s_mount_opt = 0; 1618 sbi->s_resuid = EXT3_DEF_RESUID; 1619 sbi->s_resgid = EXT3_DEF_RESGID; 1620 sbi->s_sb_block = sb_block; 1621 1622 unlock_kernel(); 1623 1624 blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE); 1625 if (!blocksize) { 1626 ext3_msg(sb, KERN_ERR, "error: unable to set blocksize"); 1627 goto out_fail; 1628 } 1629 1630 /* 1631 * The ext3 superblock will not be buffer aligned for other than 1kB 1632 * block sizes. We need to calculate the offset from buffer start. 1633 */ 1634 if (blocksize != EXT3_MIN_BLOCK_SIZE) { 1635 logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize; 1636 offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize; 1637 } else { 1638 logic_sb_block = sb_block; 1639 } 1640 1641 if (!(bh = sb_bread(sb, logic_sb_block))) { 1642 ext3_msg(sb, KERN_ERR, "error: unable to read superblock"); 1643 goto out_fail; 1644 } 1645 /* 1646 * Note: s_es must be initialized as soon as possible because 1647 * some ext3 macro-instructions depend on its value 1648 */ 1649 es = (struct ext3_super_block *) (((char *)bh->b_data) + offset); 1650 sbi->s_es = es; 1651 sb->s_magic = le16_to_cpu(es->s_magic); 1652 if (sb->s_magic != EXT3_SUPER_MAGIC) 1653 goto cantfind_ext3; 1654 1655 /* Set defaults before we parse the mount options */ 1656 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 1657 if (def_mount_opts & EXT3_DEFM_DEBUG) 1658 set_opt(sbi->s_mount_opt, DEBUG); 1659 if (def_mount_opts & EXT3_DEFM_BSDGROUPS) 1660 set_opt(sbi->s_mount_opt, GRPID); 1661 if (def_mount_opts & EXT3_DEFM_UID16) 1662 set_opt(sbi->s_mount_opt, NO_UID32); 1663#ifdef CONFIG_EXT3_FS_XATTR 1664 if (def_mount_opts & EXT3_DEFM_XATTR_USER) 1665 set_opt(sbi->s_mount_opt, XATTR_USER); 1666#endif 1667#ifdef CONFIG_EXT3_FS_POSIX_ACL 1668 if (def_mount_opts & EXT3_DEFM_ACL) 1669 set_opt(sbi->s_mount_opt, POSIX_ACL); 1670#endif 1671 if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_DATA) 1672 set_opt(sbi->s_mount_opt, JOURNAL_DATA); 1673 else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_ORDERED) 1674 set_opt(sbi->s_mount_opt, ORDERED_DATA); 1675 else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_WBACK) 1676 set_opt(sbi->s_mount_opt, WRITEBACK_DATA); 1677 1678 if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_PANIC) 1679 set_opt(sbi->s_mount_opt, ERRORS_PANIC); 1680 else if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_CONTINUE) 1681 set_opt(sbi->s_mount_opt, ERRORS_CONT); 1682 else 1683 set_opt(sbi->s_mount_opt, ERRORS_RO); 1684 1685 sbi->s_resuid = le16_to_cpu(es->s_def_resuid); 1686 sbi->s_resgid = le16_to_cpu(es->s_def_resgid); 1687 1688 set_opt(sbi->s_mount_opt, RESERVATION); 1689 1690 if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, 1691 NULL, 0)) 1692 goto failed_mount; 1693 1694 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 1695 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); 1696 1697 if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV && 1698 (EXT3_HAS_COMPAT_FEATURE(sb, ~0U) || 1699 EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) || 1700 EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U))) 1701 ext3_msg(sb, KERN_WARNING, 1702 "warning: feature flags set on rev 0 fs, " 1703 "running e2fsck is recommended"); 1704 /* 1705 * Check feature flags regardless of the revision level, since we 1706 * previously didn't change the revision level when setting the flags, 1707 * so there is a chance incompat flags are set on a rev 0 filesystem. 1708 */ 1709 features = EXT3_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP); 1710 if (features) { 1711 ext3_msg(sb, KERN_ERR, 1712 "error: couldn't mount because of unsupported " 1713 "optional features (%x)", le32_to_cpu(features)); 1714 goto failed_mount; 1715 } 1716 features = EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP); 1717 if (!(sb->s_flags & MS_RDONLY) && features) { 1718 ext3_msg(sb, KERN_ERR, 1719 "error: couldn't mount RDWR because of unsupported " 1720 "optional features (%x)", le32_to_cpu(features)); 1721 goto failed_mount; 1722 } 1723 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); 1724 1725 if (blocksize < EXT3_MIN_BLOCK_SIZE || 1726 blocksize > EXT3_MAX_BLOCK_SIZE) { 1727 ext3_msg(sb, KERN_ERR, 1728 "error: couldn't mount because of unsupported " 1729 "filesystem blocksize %d", blocksize); 1730 goto failed_mount; 1731 } 1732 1733 hblock = bdev_logical_block_size(sb->s_bdev); 1734 if (sb->s_blocksize != blocksize) { 1735 /* 1736 * Make sure the blocksize for the filesystem is larger 1737 * than the hardware sectorsize for the machine. 1738 */ 1739 if (blocksize < hblock) { 1740 ext3_msg(sb, KERN_ERR, 1741 "error: fsblocksize %d too small for " 1742 "hardware sectorsize %d", blocksize, hblock); 1743 goto failed_mount; 1744 } 1745 1746 brelse (bh); 1747 if (!sb_set_blocksize(sb, blocksize)) { 1748 ext3_msg(sb, KERN_ERR, 1749 "error: bad blocksize %d", blocksize); 1750 goto out_fail; 1751 } 1752 logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize; 1753 offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize; 1754 bh = sb_bread(sb, logic_sb_block); 1755 if (!bh) { 1756 ext3_msg(sb, KERN_ERR, 1757 "error: can't read superblock on 2nd try"); 1758 goto failed_mount; 1759 } 1760 es = (struct ext3_super_block *)(((char *)bh->b_data) + offset); 1761 sbi->s_es = es; 1762 if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) { 1763 ext3_msg(sb, KERN_ERR, 1764 "error: magic mismatch"); 1765 goto failed_mount; 1766 } 1767 } 1768 1769 sb->s_maxbytes = ext3_max_size(sb->s_blocksize_bits); 1770 1771 if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV) { 1772 sbi->s_inode_size = EXT3_GOOD_OLD_INODE_SIZE; 1773 sbi->s_first_ino = EXT3_GOOD_OLD_FIRST_INO; 1774 } else { 1775 sbi->s_inode_size = le16_to_cpu(es->s_inode_size); 1776 sbi->s_first_ino = le32_to_cpu(es->s_first_ino); 1777 if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) || 1778 (!is_power_of_2(sbi->s_inode_size)) || 1779 (sbi->s_inode_size > blocksize)) { 1780 ext3_msg(sb, KERN_ERR, 1781 "error: unsupported inode size: %d", 1782 sbi->s_inode_size); 1783 goto failed_mount; 1784 } 1785 } 1786 sbi->s_frag_size = EXT3_MIN_FRAG_SIZE << 1787 le32_to_cpu(es->s_log_frag_size); 1788 if (blocksize != sbi->s_frag_size) { 1789 ext3_msg(sb, KERN_ERR, 1790 "error: fragsize %lu != blocksize %u (unsupported)", 1791 sbi->s_frag_size, blocksize); 1792 goto failed_mount; 1793 } 1794 sbi->s_frags_per_block = 1; 1795 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); 1796 sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group); 1797 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); 1798 if (EXT3_INODE_SIZE(sb) == 0 || EXT3_INODES_PER_GROUP(sb) == 0) 1799 goto cantfind_ext3; 1800 sbi->s_inodes_per_block = blocksize / EXT3_INODE_SIZE(sb); 1801 if (sbi->s_inodes_per_block == 0) 1802 goto cantfind_ext3; 1803 sbi->s_itb_per_group = sbi->s_inodes_per_group / 1804 sbi->s_inodes_per_block; 1805 sbi->s_desc_per_block = blocksize / sizeof(struct ext3_group_desc); 1806 sbi->s_sbh = bh; 1807 sbi->s_mount_state = le16_to_cpu(es->s_state); 1808 sbi->s_addr_per_block_bits = ilog2(EXT3_ADDR_PER_BLOCK(sb)); 1809 sbi->s_desc_per_block_bits = ilog2(EXT3_DESC_PER_BLOCK(sb)); 1810 for (i=0; i < 4; i++) 1811 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); 1812 sbi->s_def_hash_version = es->s_def_hash_version; 1813 i = le32_to_cpu(es->s_flags); 1814 if (i & EXT2_FLAGS_UNSIGNED_HASH) 1815 sbi->s_hash_unsigned = 3; 1816 else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) { 1817#ifdef __CHAR_UNSIGNED__ 1818 es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH); 1819 sbi->s_hash_unsigned = 3; 1820#else 1821 es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); 1822#endif 1823 } 1824 1825 if (sbi->s_blocks_per_group > blocksize * 8) { 1826 ext3_msg(sb, KERN_ERR, 1827 "#blocks per group too big: %lu", 1828 sbi->s_blocks_per_group); 1829 goto failed_mount; 1830 } 1831 if (sbi->s_frags_per_group > blocksize * 8) { 1832 ext3_msg(sb, KERN_ERR, 1833 "error: #fragments per group too big: %lu", 1834 sbi->s_frags_per_group); 1835 goto failed_mount; 1836 } 1837 if (sbi->s_inodes_per_group > blocksize * 8) { 1838 ext3_msg(sb, KERN_ERR, 1839 "error: #inodes per group too big: %lu", 1840 sbi->s_inodes_per_group); 1841 goto failed_mount; 1842 } 1843 1844 if (le32_to_cpu(es->s_blocks_count) > 1845 (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { 1846 ext3_msg(sb, KERN_ERR, 1847 "error: filesystem is too large to mount safely"); 1848 if (sizeof(sector_t) < 8) 1849 ext3_msg(sb, KERN_ERR, 1850 "error: CONFIG_LBDAF not enabled"); 1851 goto failed_mount; 1852 } 1853 1854 if (EXT3_BLOCKS_PER_GROUP(sb) == 0) 1855 goto cantfind_ext3; 1856 sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) - 1857 le32_to_cpu(es->s_first_data_block) - 1) 1858 / EXT3_BLOCKS_PER_GROUP(sb)) + 1; 1859 db_count = (sbi->s_groups_count + EXT3_DESC_PER_BLOCK(sb) - 1) / 1860 EXT3_DESC_PER_BLOCK(sb); 1861 sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *), 1862 GFP_KERNEL); 1863 if (sbi->s_group_desc == NULL) { 1864 ext3_msg(sb, KERN_ERR, 1865 "error: not enough memory"); 1866 goto failed_mount; 1867 } 1868 1869 bgl_lock_init(sbi->s_blockgroup_lock); 1870 1871 for (i = 0; i < db_count; i++) { 1872 block = descriptor_loc(sb, logic_sb_block, i); 1873 sbi->s_group_desc[i] = sb_bread(sb, block); 1874 if (!sbi->s_group_desc[i]) { 1875 ext3_msg(sb, KERN_ERR, 1876 "error: can't read group descriptor %d", i); 1877 db_count = i; 1878 goto failed_mount2; 1879 } 1880 } 1881 if (!ext3_check_descriptors (sb)) { 1882 ext3_msg(sb, KERN_ERR, 1883 "error: group descriptors corrupted"); 1884 goto failed_mount2; 1885 } 1886 sbi->s_gdb_count = db_count; 1887 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 1888 spin_lock_init(&sbi->s_next_gen_lock); 1889 1890 /* per fileystem reservation list head & lock */ 1891 spin_lock_init(&sbi->s_rsv_window_lock); 1892 sbi->s_rsv_window_root = RB_ROOT; 1893 /* Add a single, static dummy reservation to the start of the 1894 * reservation window list --- it gives us a placeholder for 1895 * append-at-start-of-list which makes the allocation logic 1896 * _much_ simpler. */ 1897 sbi->s_rsv_window_head.rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; 1898 sbi->s_rsv_window_head.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; 1899 sbi->s_rsv_window_head.rsv_alloc_hit = 0; 1900 sbi->s_rsv_window_head.rsv_goal_size = 0; 1901 ext3_rsv_window_add(sb, &sbi->s_rsv_window_head); 1902 1903 /* 1904 * set up enough so that it can read an inode 1905 */ 1906 sb->s_op = &ext3_sops; 1907 sb->s_export_op = &ext3_export_ops; 1908 sb->s_xattr = ext3_xattr_handlers; 1909#ifdef CONFIG_QUOTA 1910 sb->s_qcop = &ext3_qctl_operations; 1911 sb->dq_op = &ext3_quota_operations; 1912#endif 1913 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 1914 mutex_init(&sbi->s_orphan_lock); 1915 mutex_init(&sbi->s_resize_lock); 1916 1917 sb->s_root = NULL; 1918 1919 needs_recovery = (es->s_last_orphan != 0 || 1920 EXT3_HAS_INCOMPAT_FEATURE(sb, 1921 EXT3_FEATURE_INCOMPAT_RECOVER)); 1922 1923 /* 1924 * The first inode we look at is the journal inode. Don't try 1925 * root first: it may be modified in the journal! 1926 */ 1927 if (!test_opt(sb, NOLOAD) && 1928 EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) { 1929 if (ext3_load_journal(sb, es, journal_devnum)) 1930 goto failed_mount2; 1931 } else if (journal_inum) { 1932 if (ext3_create_journal(sb, es, journal_inum)) 1933 goto failed_mount2; 1934 } else { 1935 if (!silent) 1936 ext3_msg(sb, KERN_ERR, 1937 "error: no journal found. " 1938 "mounting ext3 over ext2?"); 1939 goto failed_mount2; 1940 } 1941 err = percpu_counter_init(&sbi->s_freeblocks_counter, 1942 ext3_count_free_blocks(sb)); 1943 if (!err) { 1944 err = percpu_counter_init(&sbi->s_freeinodes_counter, 1945 ext3_count_free_inodes(sb)); 1946 } 1947 if (!err) { 1948 err = percpu_counter_init(&sbi->s_dirs_counter, 1949 ext3_count_dirs(sb)); 1950 } 1951 if (err) { 1952 ext3_msg(sb, KERN_ERR, "error: insufficient memory"); 1953 goto failed_mount3; 1954 } 1955 1956 /* We have now updated the journal if required, so we can 1957 * validate the data journaling mode. */ 1958 switch (test_opt(sb, DATA_FLAGS)) { 1959 case 0: 1960 /* No mode set, assume a default based on the journal 1961 capabilities: ORDERED_DATA if the journal can 1962 cope, else JOURNAL_DATA */ 1963 if (journal_check_available_features 1964 (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) 1965 set_opt(sbi->s_mount_opt, DEFAULT_DATA_MODE); 1966 else 1967 set_opt(sbi->s_mount_opt, JOURNAL_DATA); 1968 break; 1969 1970 case EXT3_MOUNT_ORDERED_DATA: 1971 case EXT3_MOUNT_WRITEBACK_DATA: 1972 if (!journal_check_available_features 1973 (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) { 1974 ext3_msg(sb, KERN_ERR, 1975 "error: journal does not support " 1976 "requested data journaling mode"); 1977 goto failed_mount3; 1978 } 1979 default: 1980 break; 1981 } 1982 1983 /* 1984 * The journal_load will have done any necessary log recovery, 1985 * so we can safely mount the rest of the filesystem now. 1986 */ 1987 1988 root = ext3_iget(sb, EXT3_ROOT_INO); 1989 if (IS_ERR(root)) { 1990 ext3_msg(sb, KERN_ERR, "error: get root inode failed"); 1991 ret = PTR_ERR(root); 1992 goto failed_mount3; 1993 } 1994 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 1995 iput(root); 1996 ext3_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck"); 1997 goto failed_mount3; 1998 } 1999 sb->s_root = d_alloc_root(root); 2000 if (!sb->s_root) { 2001 ext3_msg(sb, KERN_ERR, "error: get root dentry failed"); 2002 iput(root); 2003 ret = -ENOMEM; 2004 goto failed_mount3; 2005 } 2006 2007 ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); 2008 2009 EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS; 2010 ext3_orphan_cleanup(sb, es); 2011 EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS; 2012 if (needs_recovery) 2013 ext3_msg(sb, KERN_INFO, "recovery complete"); 2014 ext3_mark_recovery_complete(sb, es); 2015 ext3_msg(sb, KERN_INFO, "mounted filesystem with %s data mode", 2016 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal": 2017 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": 2018 "writeback"); 2019 2020 lock_kernel(); 2021 return 0; 2022 2023cantfind_ext3: 2024 if (!silent) 2025 ext3_msg(sb, KERN_INFO, 2026 "error: can't find ext3 filesystem on dev %s.", 2027 sb->s_id); 2028 goto failed_mount; 2029 2030failed_mount3: 2031 percpu_counter_destroy(&sbi->s_freeblocks_counter); 2032 percpu_counter_destroy(&sbi->s_freeinodes_counter); 2033 percpu_counter_destroy(&sbi->s_dirs_counter); 2034 journal_destroy(sbi->s_journal); 2035failed_mount2: 2036 for (i = 0; i < db_count; i++) 2037 brelse(sbi->s_group_desc[i]); 2038 kfree(sbi->s_group_desc); 2039failed_mount: 2040#ifdef CONFIG_QUOTA 2041 for (i = 0; i < MAXQUOTAS; i++) 2042 kfree(sbi->s_qf_names[i]); 2043#endif 2044 ext3_blkdev_remove(sbi); 2045 brelse(bh); 2046out_fail: 2047 sb->s_fs_info = NULL; 2048 kfree(sbi->s_blockgroup_lock); 2049 kfree(sbi); 2050 lock_kernel(); 2051 return ret; 2052} 2053 2054/* 2055 * Setup any per-fs journal parameters now. We'll do this both on 2056 * initial mount, once the journal has been initialised but before we've 2057 * done any recovery; and again on any subsequent remount. 2058 */ 2059static void ext3_init_journal_params(struct super_block *sb, journal_t *journal) 2060{ 2061 struct ext3_sb_info *sbi = EXT3_SB(sb); 2062 2063 if (sbi->s_commit_interval) 2064 journal->j_commit_interval = sbi->s_commit_interval; 2065 /* We could also set up an ext3-specific default for the commit 2066 * interval here, but for now we'll just fall back to the jbd 2067 * default. */ 2068 2069 spin_lock(&journal->j_state_lock); 2070 if (test_opt(sb, BARRIER)) 2071 journal->j_flags |= JFS_BARRIER; 2072 else 2073 journal->j_flags &= ~JFS_BARRIER; 2074 if (test_opt(sb, DATA_ERR_ABORT)) 2075 journal->j_flags |= JFS_ABORT_ON_SYNCDATA_ERR; 2076 else 2077 journal->j_flags &= ~JFS_ABORT_ON_SYNCDATA_ERR; 2078 spin_unlock(&journal->j_state_lock); 2079} 2080 2081static journal_t *ext3_get_journal(struct super_block *sb, 2082 unsigned int journal_inum) 2083{ 2084 struct inode *journal_inode; 2085 journal_t *journal; 2086 2087 /* First, test for the existence of a valid inode on disk. Bad 2088 * things happen if we iget() an unused inode, as the subsequent 2089 * iput() will try to delete it. */ 2090 2091 journal_inode = ext3_iget(sb, journal_inum); 2092 if (IS_ERR(journal_inode)) { 2093 ext3_msg(sb, KERN_ERR, "error: no journal found"); 2094 return NULL; 2095 } 2096 if (!journal_inode->i_nlink) { 2097 make_bad_inode(journal_inode); 2098 iput(journal_inode); 2099 ext3_msg(sb, KERN_ERR, "error: journal inode is deleted"); 2100 return NULL; 2101 } 2102 2103 jbd_debug(2, "Journal inode found at %p: %Ld bytes\n", 2104 journal_inode, journal_inode->i_size); 2105 if (!S_ISREG(journal_inode->i_mode)) { 2106 ext3_msg(sb, KERN_ERR, "error: invalid journal inode"); 2107 iput(journal_inode); 2108 return NULL; 2109 } 2110 2111 journal = journal_init_inode(journal_inode); 2112 if (!journal) { 2113 ext3_msg(sb, KERN_ERR, "error: could not load journal inode"); 2114 iput(journal_inode); 2115 return NULL; 2116 } 2117 journal->j_private = sb; 2118 ext3_init_journal_params(sb, journal); 2119 return journal; 2120} 2121 2122static journal_t *ext3_get_dev_journal(struct super_block *sb, 2123 dev_t j_dev) 2124{ 2125 struct buffer_head * bh; 2126 journal_t *journal; 2127 ext3_fsblk_t start; 2128 ext3_fsblk_t len; 2129 int hblock, blocksize; 2130 ext3_fsblk_t sb_block; 2131 unsigned long offset; 2132 struct ext3_super_block * es; 2133 struct block_device *bdev; 2134 2135 bdev = ext3_blkdev_get(j_dev, sb); 2136 if (bdev == NULL) 2137 return NULL; 2138 2139 if (bd_claim(bdev, sb)) { 2140 ext3_msg(sb, KERN_ERR, 2141 "error: failed to claim external journal device"); 2142 blkdev_put(bdev, FMODE_READ|FMODE_WRITE); 2143 return NULL; 2144 } 2145 2146 blocksize = sb->s_blocksize; 2147 hblock = bdev_logical_block_size(bdev); 2148 if (blocksize < hblock) { 2149 ext3_msg(sb, KERN_ERR, 2150 "error: blocksize too small for journal device"); 2151 goto out_bdev; 2152 } 2153 2154 sb_block = EXT3_MIN_BLOCK_SIZE / blocksize; 2155 offset = EXT3_MIN_BLOCK_SIZE % blocksize; 2156 set_blocksize(bdev, blocksize); 2157 if (!(bh = __bread(bdev, sb_block, blocksize))) { 2158 ext3_msg(sb, KERN_ERR, "error: couldn't read superblock of " 2159 "external journal"); 2160 goto out_bdev; 2161 } 2162 2163 es = (struct ext3_super_block *) (((char *)bh->b_data) + offset); 2164 if ((le16_to_cpu(es->s_magic) != EXT3_SUPER_MAGIC) || 2165 !(le32_to_cpu(es->s_feature_incompat) & 2166 EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) { 2167 ext3_msg(sb, KERN_ERR, "error: external journal has " 2168 "bad superblock"); 2169 brelse(bh); 2170 goto out_bdev; 2171 } 2172 2173 if (memcmp(EXT3_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { 2174 ext3_msg(sb, KERN_ERR, "error: journal UUID does not match"); 2175 brelse(bh); 2176 goto out_bdev; 2177 } 2178 2179 len = le32_to_cpu(es->s_blocks_count); 2180 start = sb_block + 1; 2181 brelse(bh); /* we're done with the superblock */ 2182 2183 journal = journal_init_dev(bdev, sb->s_bdev, 2184 start, len, blocksize); 2185 if (!journal) { 2186 ext3_msg(sb, KERN_ERR, 2187 "error: failed to create device journal"); 2188 goto out_bdev; 2189 } 2190 journal->j_private = sb; 2191 ll_rw_block(READ, 1, &journal->j_sb_buffer); 2192 wait_on_buffer(journal->j_sb_buffer); 2193 if (!buffer_uptodate(journal->j_sb_buffer)) { 2194 ext3_msg(sb, KERN_ERR, "I/O error on journal device"); 2195 goto out_journal; 2196 } 2197 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { 2198 ext3_msg(sb, KERN_ERR, 2199 "error: external journal has more than one " 2200 "user (unsupported) - %d", 2201 be32_to_cpu(journal->j_superblock->s_nr_users)); 2202 goto out_journal; 2203 } 2204 EXT3_SB(sb)->journal_bdev = bdev; 2205 ext3_init_journal_params(sb, journal); 2206 return journal; 2207out_journal: 2208 journal_destroy(journal); 2209out_bdev: 2210 ext3_blkdev_put(bdev); 2211 return NULL; 2212} 2213 2214static int ext3_load_journal(struct super_block *sb, 2215 struct ext3_super_block *es, 2216 unsigned long journal_devnum) 2217{ 2218 journal_t *journal; 2219 unsigned int journal_inum = le32_to_cpu(es->s_journal_inum); 2220 dev_t journal_dev; 2221 int err = 0; 2222 int really_read_only; 2223 2224 if (journal_devnum && 2225 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 2226 ext3_msg(sb, KERN_INFO, "external journal device major/minor " 2227 "numbers have changed"); 2228 journal_dev = new_decode_dev(journal_devnum); 2229 } else 2230 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); 2231 2232 really_read_only = bdev_read_only(sb->s_bdev); 2233 2234 /* 2235 * Are we loading a blank journal or performing recovery after a 2236 * crash? For recovery, we need to check in advance whether we 2237 * can get read-write access to the device. 2238 */ 2239 2240 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)) { 2241 if (sb->s_flags & MS_RDONLY) { 2242 ext3_msg(sb, KERN_INFO, 2243 "recovery required on readonly filesystem"); 2244 if (really_read_only) { 2245 ext3_msg(sb, KERN_ERR, "error: write access " 2246 "unavailable, cannot proceed"); 2247 return -EROFS; 2248 } 2249 ext3_msg(sb, KERN_INFO, 2250 "write access will be enabled during recovery"); 2251 } 2252 } 2253 2254 if (journal_inum && journal_dev) { 2255 ext3_msg(sb, KERN_ERR, "error: filesystem has both journal " 2256 "and inode journals"); 2257 return -EINVAL; 2258 } 2259 2260 if (journal_inum) { 2261 if (!(journal = ext3_get_journal(sb, journal_inum))) 2262 return -EINVAL; 2263 } else { 2264 if (!(journal = ext3_get_dev_journal(sb, journal_dev))) 2265 return -EINVAL; 2266 } 2267 2268 if (!(journal->j_flags & JFS_BARRIER)) 2269 printk(KERN_INFO "EXT3-fs: barriers not enabled\n"); 2270 2271 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { 2272 err = journal_update_format(journal); 2273 if (err) { 2274 ext3_msg(sb, KERN_ERR, "error updating journal"); 2275 journal_destroy(journal); 2276 return err; 2277 } 2278 } 2279 2280 if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)) 2281 err = journal_wipe(journal, !really_read_only); 2282 if (!err) 2283 err = journal_load(journal); 2284 2285 if (err) { 2286 ext3_msg(sb, KERN_ERR, "error loading journal"); 2287 journal_destroy(journal); 2288 return err; 2289 } 2290 2291 EXT3_SB(sb)->s_journal = journal; 2292 ext3_clear_journal_err(sb, es); 2293 2294 if (journal_devnum && 2295 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 2296 es->s_journal_dev = cpu_to_le32(journal_devnum); 2297 2298 /* Make sure we flush the recovery flag to disk. */ 2299 ext3_commit_super(sb, es, 1); 2300 } 2301 2302 return 0; 2303} 2304 2305static int ext3_create_journal(struct super_block *sb, 2306 struct ext3_super_block *es, 2307 unsigned int journal_inum) 2308{ 2309 journal_t *journal; 2310 int err; 2311 2312 if (sb->s_flags & MS_RDONLY) { 2313 ext3_msg(sb, KERN_ERR, 2314 "error: readonly filesystem when trying to " 2315 "create journal"); 2316 return -EROFS; 2317 } 2318 2319 journal = ext3_get_journal(sb, journal_inum); 2320 if (!journal) 2321 return -EINVAL; 2322 2323 ext3_msg(sb, KERN_INFO, "creating new journal on inode %u", 2324 journal_inum); 2325 2326 err = journal_create(journal); 2327 if (err) { 2328 ext3_msg(sb, KERN_ERR, "error creating journal"); 2329 journal_destroy(journal); 2330 return -EIO; 2331 } 2332 2333 EXT3_SB(sb)->s_journal = journal; 2334 2335 ext3_update_dynamic_rev(sb); 2336 EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 2337 EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL); 2338 2339 es->s_journal_inum = cpu_to_le32(journal_inum); 2340 2341 /* Make sure we flush the recovery flag to disk. */ 2342 ext3_commit_super(sb, es, 1); 2343 2344 return 0; 2345} 2346 2347static int ext3_commit_super(struct super_block *sb, 2348 struct ext3_super_block *es, 2349 int sync) 2350{ 2351 struct buffer_head *sbh = EXT3_SB(sb)->s_sbh; 2352 int error = 0; 2353 2354 if (!sbh) 2355 return error; 2356 /* 2357 * If the file system is mounted read-only, don't update the 2358 * superblock write time. This avoids updating the superblock 2359 * write time when we are mounting the root file system 2360 * read/only but we need to replay the journal; at that point, 2361 * for people who are east of GMT and who make their clock 2362 * tick in localtime for Windows bug-for-bug compatibility, 2363 * the clock is set in the future, and this will cause e2fsck 2364 * to complain and force a full file system check. 2365 */ 2366 if (!(sb->s_flags & MS_RDONLY)) 2367 es->s_wtime = cpu_to_le32(get_seconds()); 2368 es->s_free_blocks_count = cpu_to_le32(ext3_count_free_blocks(sb)); 2369 es->s_free_inodes_count = cpu_to_le32(ext3_count_free_inodes(sb)); 2370 BUFFER_TRACE(sbh, "marking dirty"); 2371 mark_buffer_dirty(sbh); 2372 if (sync) 2373 error = sync_dirty_buffer(sbh); 2374 return error; 2375} 2376 2377 2378/* 2379 * Have we just finished recovery? If so, and if we are mounting (or 2380 * remounting) the filesystem readonly, then we will end up with a 2381 * consistent fs on disk. Record that fact. 2382 */ 2383static void ext3_mark_recovery_complete(struct super_block * sb, 2384 struct ext3_super_block * es) 2385{ 2386 journal_t *journal = EXT3_SB(sb)->s_journal; 2387 2388 journal_lock_updates(journal); 2389 if (journal_flush(journal) < 0) 2390 goto out; 2391 2392 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) && 2393 sb->s_flags & MS_RDONLY) { 2394 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 2395 ext3_commit_super(sb, es, 1); 2396 } 2397 2398out: 2399 journal_unlock_updates(journal); 2400} 2401 2402/* 2403 * If we are mounting (or read-write remounting) a filesystem whose journal 2404 * has recorded an error from a previous lifetime, move that error to the 2405 * main filesystem now. 2406 */ 2407static void ext3_clear_journal_err(struct super_block *sb, 2408 struct ext3_super_block *es) 2409{ 2410 journal_t *journal; 2411 int j_errno; 2412 const char *errstr; 2413 2414 journal = EXT3_SB(sb)->s_journal; 2415 2416 /* 2417 * Now check for any error status which may have been recorded in the 2418 * journal by a prior ext3_error() or ext3_abort() 2419 */ 2420 2421 j_errno = journal_errno(journal); 2422 if (j_errno) { 2423 char nbuf[16]; 2424 2425 errstr = ext3_decode_error(sb, j_errno, nbuf); 2426 ext3_warning(sb, __func__, "Filesystem error recorded " 2427 "from previous mount: %s", errstr); 2428 ext3_warning(sb, __func__, "Marking fs in need of " 2429 "filesystem check."); 2430 2431 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; 2432 es->s_state |= cpu_to_le16(EXT3_ERROR_FS); 2433 ext3_commit_super (sb, es, 1); 2434 2435 journal_clear_err(journal); 2436 } 2437} 2438 2439/* 2440 * Force the running and committing transactions to commit, 2441 * and wait on the commit. 2442 */ 2443int ext3_force_commit(struct super_block *sb) 2444{ 2445 journal_t *journal; 2446 int ret; 2447 2448 if (sb->s_flags & MS_RDONLY) 2449 return 0; 2450 2451 journal = EXT3_SB(sb)->s_journal; 2452 ret = ext3_journal_force_commit(journal); 2453 return ret; 2454} 2455 2456static int ext3_sync_fs(struct super_block *sb, int wait) 2457{ 2458 tid_t target; 2459 2460 if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) { 2461 if (wait) 2462 log_wait_commit(EXT3_SB(sb)->s_journal, target); 2463 } 2464 return 0; 2465} 2466 2467/* 2468 * LVM calls this function before a (read-only) snapshot is created. This 2469 * gives us a chance to flush the journal completely and mark the fs clean. 2470 */ 2471static int ext3_freeze(struct super_block *sb) 2472{ 2473 int error = 0; 2474 journal_t *journal; 2475 2476 if (!(sb->s_flags & MS_RDONLY)) { 2477 journal = EXT3_SB(sb)->s_journal; 2478 2479 /* Now we set up the journal barrier. */ 2480 journal_lock_updates(journal); 2481 2482 /* 2483 * We don't want to clear needs_recovery flag when we failed 2484 * to flush the journal. 2485 */ 2486 error = journal_flush(journal); 2487 if (error < 0) 2488 goto out; 2489 2490 /* Journal blocked and flushed, clear needs_recovery flag. */ 2491 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 2492 error = ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1); 2493 if (error) 2494 goto out; 2495 } 2496 return 0; 2497 2498out: 2499 journal_unlock_updates(journal); 2500 return error; 2501} 2502 2503/* 2504 * Called by LVM after the snapshot is done. We need to reset the RECOVER 2505 * flag here, even though the filesystem is not technically dirty yet. 2506 */ 2507static int ext3_unfreeze(struct super_block *sb) 2508{ 2509 if (!(sb->s_flags & MS_RDONLY)) { 2510 lock_super(sb); 2511 /* Reser the needs_recovery flag before the fs is unlocked. */ 2512 EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 2513 ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1); 2514 unlock_super(sb); 2515 journal_unlock_updates(EXT3_SB(sb)->s_journal); 2516 } 2517 return 0; 2518} 2519 2520static int ext3_remount (struct super_block * sb, int * flags, char * data) 2521{ 2522 struct ext3_super_block * es; 2523 struct ext3_sb_info *sbi = EXT3_SB(sb); 2524 ext3_fsblk_t n_blocks_count = 0; 2525 unsigned long old_sb_flags; 2526 struct ext3_mount_options old_opts; 2527 int enable_quota = 0; 2528 int err; 2529#ifdef CONFIG_QUOTA 2530 int i; 2531#endif 2532 2533 lock_kernel(); 2534 2535 /* Store the original options */ 2536 lock_super(sb); 2537 old_sb_flags = sb->s_flags; 2538 old_opts.s_mount_opt = sbi->s_mount_opt; 2539 old_opts.s_resuid = sbi->s_resuid; 2540 old_opts.s_resgid = sbi->s_resgid; 2541 old_opts.s_commit_interval = sbi->s_commit_interval; 2542#ifdef CONFIG_QUOTA 2543 old_opts.s_jquota_fmt = sbi->s_jquota_fmt; 2544 for (i = 0; i < MAXQUOTAS; i++) 2545 old_opts.s_qf_names[i] = sbi->s_qf_names[i]; 2546#endif 2547 2548 /* 2549 * Allow the "check" option to be passed as a remount option. 2550 */ 2551 if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) { 2552 err = -EINVAL; 2553 goto restore_opts; 2554 } 2555 2556 if (test_opt(sb, ABORT)) 2557 ext3_abort(sb, __func__, "Abort forced by user"); 2558 2559 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 2560 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); 2561 2562 es = sbi->s_es; 2563 2564 ext3_init_journal_params(sb, sbi->s_journal); 2565 2566 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || 2567 n_blocks_count > le32_to_cpu(es->s_blocks_count)) { 2568 if (test_opt(sb, ABORT)) { 2569 err = -EROFS; 2570 goto restore_opts; 2571 } 2572 2573 if (*flags & MS_RDONLY) { 2574 err = dquot_suspend(sb, -1); 2575 if (err < 0) 2576 goto restore_opts; 2577 2578 /* 2579 * First of all, the unconditional stuff we have to do 2580 * to disable replay of the journal when we next remount 2581 */ 2582 sb->s_flags |= MS_RDONLY; 2583 2584 /* 2585 * OK, test if we are remounting a valid rw partition 2586 * readonly, and if so set the rdonly flag and then 2587 * mark the partition as valid again. 2588 */ 2589 if (!(es->s_state & cpu_to_le16(EXT3_VALID_FS)) && 2590 (sbi->s_mount_state & EXT3_VALID_FS)) 2591 es->s_state = cpu_to_le16(sbi->s_mount_state); 2592 2593 ext3_mark_recovery_complete(sb, es); 2594 } else { 2595 __le32 ret; 2596 if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb, 2597 ~EXT3_FEATURE_RO_COMPAT_SUPP))) { 2598 ext3_msg(sb, KERN_WARNING, 2599 "warning: couldn't remount RDWR " 2600 "because of unsupported optional " 2601 "features (%x)", le32_to_cpu(ret)); 2602 err = -EROFS; 2603 goto restore_opts; 2604 } 2605 2606 /* 2607 * If we have an unprocessed orphan list hanging 2608 * around from a previously readonly bdev mount, 2609 * require a full umount/remount for now. 2610 */ 2611 if (es->s_last_orphan) { 2612 ext3_msg(sb, KERN_WARNING, "warning: couldn't " 2613 "remount RDWR because of unprocessed " 2614 "orphan inode list. Please " 2615 "umount/remount instead."); 2616 err = -EINVAL; 2617 goto restore_opts; 2618 } 2619 2620 /* 2621 * Mounting a RDONLY partition read-write, so reread 2622 * and store the current valid flag. (It may have 2623 * been changed by e2fsck since we originally mounted 2624 * the partition.) 2625 */ 2626 ext3_clear_journal_err(sb, es); 2627 sbi->s_mount_state = le16_to_cpu(es->s_state); 2628 if ((err = ext3_group_extend(sb, es, n_blocks_count))) 2629 goto restore_opts; 2630 if (!ext3_setup_super (sb, es, 0)) 2631 sb->s_flags &= ~MS_RDONLY; 2632 enable_quota = 1; 2633 } 2634 } 2635#ifdef CONFIG_QUOTA 2636 /* Release old quota file names */ 2637 for (i = 0; i < MAXQUOTAS; i++) 2638 if (old_opts.s_qf_names[i] && 2639 old_opts.s_qf_names[i] != sbi->s_qf_names[i]) 2640 kfree(old_opts.s_qf_names[i]); 2641#endif 2642 unlock_super(sb); 2643 unlock_kernel(); 2644 2645 if (enable_quota) 2646 dquot_resume(sb, -1); 2647 return 0; 2648restore_opts: 2649 sb->s_flags = old_sb_flags; 2650 sbi->s_mount_opt = old_opts.s_mount_opt; 2651 sbi->s_resuid = old_opts.s_resuid; 2652 sbi->s_resgid = old_opts.s_resgid; 2653 sbi->s_commit_interval = old_opts.s_commit_interval; 2654#ifdef CONFIG_QUOTA 2655 sbi->s_jquota_fmt = old_opts.s_jquota_fmt; 2656 for (i = 0; i < MAXQUOTAS; i++) { 2657 if (sbi->s_qf_names[i] && 2658 old_opts.s_qf_names[i] != sbi->s_qf_names[i]) 2659 kfree(sbi->s_qf_names[i]); 2660 sbi->s_qf_names[i] = old_opts.s_qf_names[i]; 2661 } 2662#endif 2663 unlock_super(sb); 2664 unlock_kernel(); 2665 return err; 2666} 2667 2668static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf) 2669{ 2670 struct super_block *sb = dentry->d_sb; 2671 struct ext3_sb_info *sbi = EXT3_SB(sb); 2672 struct ext3_super_block *es = sbi->s_es; 2673 u64 fsid; 2674 2675 if (test_opt(sb, MINIX_DF)) { 2676 sbi->s_overhead_last = 0; 2677 } else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) { 2678 unsigned long ngroups = sbi->s_groups_count, i; 2679 ext3_fsblk_t overhead = 0; 2680 smp_rmb(); 2681 2682 /* 2683 * Compute the overhead (FS structures). This is constant 2684 * for a given filesystem unless the number of block groups 2685 * changes so we cache the previous value until it does. 2686 */ 2687 2688 /* 2689 * All of the blocks before first_data_block are 2690 * overhead 2691 */ 2692 overhead = le32_to_cpu(es->s_first_data_block); 2693 2694 /* 2695 * Add the overhead attributed to the superblock and 2696 * block group descriptors. If the sparse superblocks 2697 * feature is turned on, then not all groups have this. 2698 */ 2699 for (i = 0; i < ngroups; i++) { 2700 overhead += ext3_bg_has_super(sb, i) + 2701 ext3_bg_num_gdb(sb, i); 2702 cond_resched(); 2703 } 2704 2705 /* 2706 * Every block group has an inode bitmap, a block 2707 * bitmap, and an inode table. 2708 */ 2709 overhead += ngroups * (2 + sbi->s_itb_per_group); 2710 sbi->s_overhead_last = overhead; 2711 smp_wmb(); 2712 sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count); 2713 } 2714 2715 buf->f_type = EXT3_SUPER_MAGIC; 2716 buf->f_bsize = sb->s_blocksize; 2717 buf->f_blocks = le32_to_cpu(es->s_blocks_count) - sbi->s_overhead_last; 2718 buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter); 2719 buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count); 2720 if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count)) 2721 buf->f_bavail = 0; 2722 buf->f_files = le32_to_cpu(es->s_inodes_count); 2723 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); 2724 buf->f_namelen = EXT3_NAME_LEN; 2725 fsid = le64_to_cpup((void *)es->s_uuid) ^ 2726 le64_to_cpup((void *)es->s_uuid + sizeof(u64)); 2727 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; 2728 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; 2729 return 0; 2730} 2731 2732/* Helper function for writing quotas on sync - we need to start transaction before quota file 2733 * is locked for write. Otherwise the are possible deadlocks: 2734 * Process 1 Process 2 2735 * ext3_create() quota_sync() 2736 * journal_start() write_dquot() 2737 * dquot_initialize() down(dqio_mutex) 2738 * down(dqio_mutex) journal_start() 2739 * 2740 */ 2741 2742#ifdef CONFIG_QUOTA 2743 2744static inline struct inode *dquot_to_inode(struct dquot *dquot) 2745{ 2746 return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; 2747} 2748 2749static int ext3_write_dquot(struct dquot *dquot) 2750{ 2751 int ret, err; 2752 handle_t *handle; 2753 struct inode *inode; 2754 2755 inode = dquot_to_inode(dquot); 2756 handle = ext3_journal_start(inode, 2757 EXT3_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); 2758 if (IS_ERR(handle)) 2759 return PTR_ERR(handle); 2760 ret = dquot_commit(dquot); 2761 err = ext3_journal_stop(handle); 2762 if (!ret) 2763 ret = err; 2764 return ret; 2765} 2766 2767static int ext3_acquire_dquot(struct dquot *dquot) 2768{ 2769 int ret, err; 2770 handle_t *handle; 2771 2772 handle = ext3_journal_start(dquot_to_inode(dquot), 2773 EXT3_QUOTA_INIT_BLOCKS(dquot->dq_sb)); 2774 if (IS_ERR(handle)) 2775 return PTR_ERR(handle); 2776 ret = dquot_acquire(dquot); 2777 err = ext3_journal_stop(handle); 2778 if (!ret) 2779 ret = err; 2780 return ret; 2781} 2782 2783static int ext3_release_dquot(struct dquot *dquot) 2784{ 2785 int ret, err; 2786 handle_t *handle; 2787 2788 handle = ext3_journal_start(dquot_to_inode(dquot), 2789 EXT3_QUOTA_DEL_BLOCKS(dquot->dq_sb)); 2790 if (IS_ERR(handle)) { 2791 /* Release dquot anyway to avoid endless cycle in dqput() */ 2792 dquot_release(dquot); 2793 return PTR_ERR(handle); 2794 } 2795 ret = dquot_release(dquot); 2796 err = ext3_journal_stop(handle); 2797 if (!ret) 2798 ret = err; 2799 return ret; 2800} 2801 2802static int ext3_mark_dquot_dirty(struct dquot *dquot) 2803{ 2804 /* Are we journaling quotas? */ 2805 if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || 2806 EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { 2807 dquot_mark_dquot_dirty(dquot); 2808 return ext3_write_dquot(dquot); 2809 } else { 2810 return dquot_mark_dquot_dirty(dquot); 2811 } 2812} 2813 2814static int ext3_write_info(struct super_block *sb, int type) 2815{ 2816 int ret, err; 2817 handle_t *handle; 2818 2819 /* Data block + inode block */ 2820 handle = ext3_journal_start(sb->s_root->d_inode, 2); 2821 if (IS_ERR(handle)) 2822 return PTR_ERR(handle); 2823 ret = dquot_commit_info(sb, type); 2824 err = ext3_journal_stop(handle); 2825 if (!ret) 2826 ret = err; 2827 return ret; 2828} 2829 2830/* 2831 * Turn on quotas during mount time - we need to find 2832 * the quota file and such... 2833 */ 2834static int ext3_quota_on_mount(struct super_block *sb, int type) 2835{ 2836 return dquot_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type], 2837 EXT3_SB(sb)->s_jquota_fmt, type); 2838} 2839 2840/* 2841 * Standard function to be called on quota_on 2842 */ 2843static int ext3_quota_on(struct super_block *sb, int type, int format_id, 2844 char *name) 2845{ 2846 int err; 2847 struct path path; 2848 2849 if (!test_opt(sb, QUOTA)) 2850 return -EINVAL; 2851 2852 err = kern_path(name, LOOKUP_FOLLOW, &path); 2853 if (err) 2854 return err; 2855 2856 /* Quotafile not on the same filesystem? */ 2857 if (path.mnt->mnt_sb != sb) { 2858 path_put(&path); 2859 return -EXDEV; 2860 } 2861 /* Journaling quota? */ 2862 if (EXT3_SB(sb)->s_qf_names[type]) { 2863 /* Quotafile not of fs root? */ 2864 if (path.dentry->d_parent != sb->s_root) 2865 ext3_msg(sb, KERN_WARNING, 2866 "warning: Quota file not on filesystem root. " 2867 "Journaled quota will not work."); 2868 } 2869 2870 /* 2871 * When we journal data on quota file, we have to flush journal to see 2872 * all updates to the file when we bypass pagecache... 2873 */ 2874 if (ext3_should_journal_data(path.dentry->d_inode)) { 2875 /* 2876 * We don't need to lock updates but journal_flush() could 2877 * otherwise be livelocked... 2878 */ 2879 journal_lock_updates(EXT3_SB(sb)->s_journal); 2880 err = journal_flush(EXT3_SB(sb)->s_journal); 2881 journal_unlock_updates(EXT3_SB(sb)->s_journal); 2882 if (err) { 2883 path_put(&path); 2884 return err; 2885 } 2886 } 2887 2888 err = dquot_quota_on_path(sb, type, format_id, &path); 2889 path_put(&path); 2890 return err; 2891} 2892 2893/* Read data from quotafile - avoid pagecache and such because we cannot afford 2894 * acquiring the locks... As quota files are never truncated and quota code 2895 * itself serializes the operations (and noone else should touch the files) 2896 * we don't have to be afraid of races */ 2897static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data, 2898 size_t len, loff_t off) 2899{ 2900 struct inode *inode = sb_dqopt(sb)->files[type]; 2901 sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb); 2902 int err = 0; 2903 int offset = off & (sb->s_blocksize - 1); 2904 int tocopy; 2905 size_t toread; 2906 struct buffer_head *bh; 2907 loff_t i_size = i_size_read(inode); 2908 2909 if (off > i_size) 2910 return 0; 2911 if (off+len > i_size) 2912 len = i_size-off; 2913 toread = len; 2914 while (toread > 0) { 2915 tocopy = sb->s_blocksize - offset < toread ? 2916 sb->s_blocksize - offset : toread; 2917 bh = ext3_bread(NULL, inode, blk, 0, &err); 2918 if (err) 2919 return err; 2920 if (!bh) /* A hole? */ 2921 memset(data, 0, tocopy); 2922 else 2923 memcpy(data, bh->b_data+offset, tocopy); 2924 brelse(bh); 2925 offset = 0; 2926 toread -= tocopy; 2927 data += tocopy; 2928 blk++; 2929 } 2930 return len; 2931} 2932 2933/* Write to quotafile (we know the transaction is already started and has 2934 * enough credits) */ 2935static ssize_t ext3_quota_write(struct super_block *sb, int type, 2936 const char *data, size_t len, loff_t off) 2937{ 2938 struct inode *inode = sb_dqopt(sb)->files[type]; 2939 sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb); 2940 int err = 0; 2941 int offset = off & (sb->s_blocksize - 1); 2942 int journal_quota = EXT3_SB(sb)->s_qf_names[type] != NULL; 2943 struct buffer_head *bh; 2944 handle_t *handle = journal_current_handle(); 2945 2946 if (!handle) { 2947 ext3_msg(sb, KERN_WARNING, 2948 "warning: quota write (off=%llu, len=%llu)" 2949 " cancelled because transaction is not started.", 2950 (unsigned long long)off, (unsigned long long)len); 2951 return -EIO; 2952 } 2953 2954 /* 2955 * Since we account only one data block in transaction credits, 2956 * then it is impossible to cross a block boundary. 2957 */ 2958 if (sb->s_blocksize - offset < len) { 2959 ext3_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" 2960 " cancelled because not block aligned", 2961 (unsigned long long)off, (unsigned long long)len); 2962 return -EIO; 2963 } 2964 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); 2965 bh = ext3_bread(handle, inode, blk, 1, &err); 2966 if (!bh) 2967 goto out; 2968 if (journal_quota) { 2969 err = ext3_journal_get_write_access(handle, bh); 2970 if (err) { 2971 brelse(bh); 2972 goto out; 2973 } 2974 } 2975 lock_buffer(bh); 2976 memcpy(bh->b_data+offset, data, len); 2977 flush_dcache_page(bh->b_page); 2978 unlock_buffer(bh); 2979 if (journal_quota) 2980 err = ext3_journal_dirty_metadata(handle, bh); 2981 else { 2982 /* Always do at least ordered writes for quotas */ 2983 err = ext3_journal_dirty_data(handle, bh); 2984 mark_buffer_dirty(bh); 2985 } 2986 brelse(bh); 2987out: 2988 if (err) { 2989 mutex_unlock(&inode->i_mutex); 2990 return err; 2991 } 2992 if (inode->i_size < off + len) { 2993 i_size_write(inode, off + len); 2994 EXT3_I(inode)->i_disksize = inode->i_size; 2995 } 2996 inode->i_version++; 2997 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 2998 ext3_mark_inode_dirty(handle, inode); 2999 mutex_unlock(&inode->i_mutex); 3000 return len; 3001} 3002 3003#endif 3004 3005static int ext3_get_sb(struct file_system_type *fs_type, 3006 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 3007{ 3008 return get_sb_bdev(fs_type, flags, dev_name, data, ext3_fill_super, mnt); 3009} 3010 3011static struct file_system_type ext3_fs_type = { 3012 .owner = THIS_MODULE, 3013 .name = "ext3", 3014 .get_sb = ext3_get_sb, 3015 .kill_sb = kill_block_super, 3016 .fs_flags = FS_REQUIRES_DEV, 3017}; 3018 3019static int __init init_ext3_fs(void) 3020{ 3021 int err = init_ext3_xattr(); 3022 if (err) 3023 return err; 3024 err = init_inodecache(); 3025 if (err) 3026 goto out1; 3027 err = register_filesystem(&ext3_fs_type); 3028 if (err) 3029 goto out; 3030 return 0; 3031out: 3032 destroy_inodecache(); 3033out1: 3034 exit_ext3_xattr(); 3035 return err; 3036} 3037 3038static void __exit exit_ext3_fs(void) 3039{ 3040 unregister_filesystem(&ext3_fs_type); 3041 destroy_inodecache(); 3042 exit_ext3_xattr(); 3043} 3044 3045MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); 3046MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); 3047MODULE_LICENSE("GPL"); 3048module_init(init_ext3_fs) 3049module_exit(exit_ext3_fs) 3050