1/* 2 * linux/fs/ext3/super.c 3 * 4 * Copyright (C) 1992, 1993, 1994, 1995 5 * Remy Card (card@masi.ibp.fr) 6 * Laboratoire MASI - Institut Blaise Pascal 7 * Universite Pierre et Marie Curie (Paris VI) 8 * 9 * from 10 * 11 * linux/fs/minix/inode.c 12 * 13 * Copyright (C) 1991, 1992 Linus Torvalds 14 * 15 * Big-endian to little-endian byte-swapping/bitmaps by 16 * David S. Miller (davem@caip.rutgers.edu), 1995 17 */ 18 19#include <linux/config.h> 20#include <linux/module.h> 21#include <linux/string.h> 22#include <linux/fs.h> 23#include <linux/sched.h> 24#include <linux/jbd.h> 25#include <linux/ext3_fs.h> 26#include <linux/ext3_jbd.h> 27#include <linux/slab.h> 28#include <linux/init.h> 29#include <linux/locks.h> 30#include <linux/blkdev.h> 31#include <linux/smp_lock.h> 32#include <linux/random.h> 33#include <asm/uaccess.h> 34 35#ifdef CONFIG_JBD_DEBUG 36static int ext3_ro_after; /* Make fs read-only after this many jiffies */ 37#endif 38 39static int ext3_load_journal(struct super_block *, struct ext3_super_block *); 40static int ext3_create_journal(struct super_block *, struct ext3_super_block *, 41 int); 42static void ext3_commit_super (struct super_block * sb, 43 struct ext3_super_block * es, 44 int sync); 45static void ext3_mark_recovery_complete(struct super_block * sb, 46 struct ext3_super_block * es); 47static void ext3_clear_journal_err(struct super_block * sb, 48 struct ext3_super_block * es); 49 50#ifdef CONFIG_JBD_DEBUG 51int journal_no_write[2]; 52 53/* 54 * Debug code for turning filesystems "read-only" after a specified 55 * amount of time. This is for crash/recovery testing. 56 */ 57 58static void make_rdonly(kdev_t dev, int *no_write) 59{ 60 if (dev) { 61 printk(KERN_WARNING "Turning device %s read-only\n", 62 bdevname(dev)); 63 *no_write = 0xdead0000 + dev; 64 } 65} 66 67static void turn_fs_readonly(unsigned long arg) 68{ 69 struct super_block *sb = (struct super_block *)arg; 70 71 make_rdonly(sb->s_dev, &journal_no_write[0]); 72 make_rdonly(EXT3_SB(sb)->s_journal->j_dev, &journal_no_write[1]); 73 wake_up(&EXT3_SB(sb)->ro_wait_queue); 74} 75 76static void setup_ro_after(struct super_block *sb) 77{ 78 struct ext3_sb_info *sbi = EXT3_SB(sb); 79 init_timer(&sbi->turn_ro_timer); 80 if (ext3_ro_after) { 81 printk(KERN_DEBUG "fs will go read-only in %d jiffies\n", 82 ext3_ro_after); 83 init_waitqueue_head(&sbi->ro_wait_queue); 84 journal_no_write[0] = 0; 85 journal_no_write[1] = 0; 86 sbi->turn_ro_timer.function = turn_fs_readonly; 87 sbi->turn_ro_timer.data = (unsigned long)sb; 88 sbi->turn_ro_timer.expires = jiffies + ext3_ro_after; 89 ext3_ro_after = 0; 90 add_timer(&sbi->turn_ro_timer); 91 } 92} 93 94static void clear_ro_after(struct super_block *sb) 95{ 96 del_timer_sync(&EXT3_SB(sb)->turn_ro_timer); 97 journal_no_write[0] = 0; 98 journal_no_write[1] = 0; 99 ext3_ro_after = 0; 100} 101#else 102#define setup_ro_after(sb) do {} while (0) 103#define clear_ro_after(sb) do {} while (0) 104#endif 105 106 107static char error_buf[1024]; 108 109/* Determine the appropriate response to ext3_error on a given filesystem */ 110 111static int ext3_error_behaviour(struct super_block *sb) 112{ 113 /* First check for mount-time options */ 114 if (test_opt (sb, ERRORS_PANIC)) 115 return EXT3_ERRORS_PANIC; 116 if (test_opt (sb, ERRORS_RO)) 117 return EXT3_ERRORS_RO; 118 if (test_opt (sb, ERRORS_CONT)) 119 return EXT3_ERRORS_CONTINUE; 120 121 /* If no overrides were specified on the mount, then fall back 122 * to the default behaviour set in the filesystem's superblock 123 * on disk. */ 124 switch (le16_to_cpu(sb->u.ext3_sb.s_es->s_errors)) { 125 case EXT3_ERRORS_PANIC: 126 return EXT3_ERRORS_PANIC; 127 case EXT3_ERRORS_RO: 128 return EXT3_ERRORS_RO; 129 default: 130 break; 131 } 132 return EXT3_ERRORS_CONTINUE; 133} 134 135/* Deal with the reporting of failure conditions on a filesystem such as 136 * inconsistencies detected or read IO failures. 137 * 138 * On ext2, we can store the error state of the filesystem in the 139 * superblock. That is not possible on ext3, because we may have other 140 * write ordering constraints on the superblock which prevent us from 141 * writing it out straight away; and given that the journal is about to 142 * be aborted, we can't rely on the current, or future, transactions to 143 * write out the superblock safely. 144 * 145 * We'll just use the journal_abort() error code to record an error in 146 * the journal instead. On recovery, the journal will compain about 147 * that error until we've noted it down and cleared it. 148 */ 149 150static void ext3_handle_error(struct super_block *sb) 151{ 152 struct ext3_super_block *es = EXT3_SB(sb)->s_es; 153 154 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; 155 es->s_state |= cpu_to_le32(EXT3_ERROR_FS); 156 157 if (sb->s_flags & MS_RDONLY) 158 return; 159 160 if (ext3_error_behaviour(sb) != EXT3_ERRORS_CONTINUE) { 161 EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT; 162 journal_abort(EXT3_SB(sb)->s_journal, -EIO); 163 } 164 165 if (ext3_error_behaviour(sb) == EXT3_ERRORS_PANIC) 166 panic ("EXT3-fs (device %s): panic forced after error\n", 167 bdevname(sb->s_dev)); 168 169 if (ext3_error_behaviour(sb) == EXT3_ERRORS_RO) { 170 printk (KERN_CRIT "Remounting filesystem read-only\n"); 171 sb->s_flags |= MS_RDONLY; 172 } 173 174 ext3_commit_super(sb, es, 1); 175} 176 177void ext3_error (struct super_block * sb, const char * function, 178 const char * fmt, ...) 179{ 180 va_list args; 181 182 va_start (args, fmt); 183 vsprintf (error_buf, fmt, args); 184 va_end (args); 185 186 printk (KERN_CRIT "EXT3-fs error (device %s): %s: %s\n", 187 bdevname(sb->s_dev), function, error_buf); 188 189 ext3_handle_error(sb); 190} 191 192const char *ext3_decode_error(struct super_block * sb, int errno, char nbuf[16]) 193{ 194 char *errstr = NULL; 195 196 switch (errno) { 197 case -EIO: 198 errstr = "IO failure"; 199 break; 200 case -ENOMEM: 201 errstr = "Out of memory"; 202 break; 203 case -EROFS: 204 if (!sb || EXT3_SB(sb)->s_journal->j_flags & JFS_ABORT) 205 errstr = "Journal has aborted"; 206 else 207 errstr = "Readonly filesystem"; 208 break; 209 default: 210 /* If the caller passed in an extra buffer for unknown 211 * errors, textualise them now. Else we just return 212 * NULL. */ 213 if (nbuf) { 214 /* Check for truncated error codes... */ 215 if (snprintf(nbuf, 16, "error %d", -errno) >= 0) 216 errstr = nbuf; 217 } 218 219 break; 220 } 221 222 return errstr; 223} 224 225/* __ext3_std_error decodes expected errors from journaling functions 226 * automatically and invokes the appropriate error response. */ 227 228void __ext3_std_error (struct super_block * sb, const char * function, 229 int errno) 230{ 231 char nbuf[16]; 232 const char *errstr = ext3_decode_error(sb, errno, nbuf); 233 234 printk (KERN_CRIT "EXT3-fs error (device %s) in %s: %s\n", 235 bdevname(sb->s_dev), function, errstr); 236 237 ext3_handle_error(sb); 238} 239 240/* 241 * ext3_abort is a much stronger failure handler than ext3_error. The 242 * abort function may be used to deal with unrecoverable failures such 243 * as journal IO errors or ENOMEM at a critical moment in log management. 244 * 245 * We unconditionally force the filesystem into an ABORT|READONLY state, 246 * unless the error response on the fs has been set to panic in which 247 * case we take the easy way out and panic immediately. 248 */ 249 250void ext3_abort (struct super_block * sb, const char * function, 251 const char * fmt, ...) 252{ 253 va_list args; 254 255 printk (KERN_CRIT "ext3_abort called.\n"); 256 257 va_start (args, fmt); 258 vsprintf (error_buf, fmt, args); 259 va_end (args); 260 261 if (ext3_error_behaviour(sb) == EXT3_ERRORS_PANIC) 262 panic ("EXT3-fs panic (device %s): %s: %s\n", 263 bdevname(sb->s_dev), function, error_buf); 264 265 printk (KERN_CRIT "EXT3-fs abort (device %s): %s: %s\n", 266 bdevname(sb->s_dev), function, error_buf); 267 268 if (sb->s_flags & MS_RDONLY) 269 return; 270 271 printk (KERN_CRIT "Remounting filesystem read-only\n"); 272 sb->u.ext3_sb.s_mount_state |= EXT3_ERROR_FS; 273 sb->s_flags |= MS_RDONLY; 274 sb->u.ext3_sb.s_mount_opt |= EXT3_MOUNT_ABORT; 275 journal_abort(EXT3_SB(sb)->s_journal, -EIO); 276} 277 278/* Deal with the reporting of failure conditions while running, such as 279 * inconsistencies in operation or invalid system states. 280 * 281 * Use ext3_error() for cases of invalid filesystem states, as that will 282 * record an error on disk and force a filesystem check on the next boot. 283 */ 284NORET_TYPE void ext3_panic (struct super_block * sb, const char * function, 285 const char * fmt, ...) 286{ 287 va_list args; 288 289 va_start (args, fmt); 290 vsprintf (error_buf, fmt, args); 291 va_end (args); 292 293 /* this is to prevent panic from syncing this filesystem */ 294 /* AKPM: is this sufficient? */ 295 sb->s_flags |= MS_RDONLY; 296 panic ("EXT3-fs panic (device %s): %s: %s\n", 297 bdevname(sb->s_dev), function, error_buf); 298} 299 300void ext3_warning (struct super_block * sb, const char * function, 301 const char * fmt, ...) 302{ 303 va_list args; 304 305 va_start (args, fmt); 306 vsprintf (error_buf, fmt, args); 307 va_end (args); 308 printk (KERN_WARNING "EXT3-fs warning (device %s): %s: %s\n", 309 bdevname(sb->s_dev), function, error_buf); 310} 311 312void ext3_update_dynamic_rev(struct super_block *sb) 313{ 314 struct ext3_super_block *es = EXT3_SB(sb)->s_es; 315 316 if (le32_to_cpu(es->s_rev_level) > EXT3_GOOD_OLD_REV) 317 return; 318 319 ext3_warning(sb, __FUNCTION__, 320 "updating to rev %d because of new feature flag, " 321 "running e2fsck is recommended", 322 EXT3_DYNAMIC_REV); 323 324 es->s_first_ino = cpu_to_le32(EXT3_GOOD_OLD_FIRST_INO); 325 es->s_inode_size = cpu_to_le16(EXT3_GOOD_OLD_INODE_SIZE); 326 es->s_rev_level = cpu_to_le32(EXT3_DYNAMIC_REV); 327 /* leave es->s_feature_*compat flags alone */ 328 /* es->s_uuid will be set by e2fsck if empty */ 329 330 /* 331 * The rest of the superblock fields should be zero, and if not it 332 * means they are likely already in use, so leave them alone. We 333 * can leave it up to e2fsck to clean up any inconsistencies there. 334 */ 335} 336 337/* 338 * Open the external journal device 339 */ 340static struct block_device *ext3_blkdev_get(kdev_t dev) 341{ 342 struct block_device *bdev; 343 int err = -ENODEV; 344 345 bdev = bdget(kdev_t_to_nr(dev)); 346 if (bdev == NULL) 347 goto fail; 348 err = blkdev_get(bdev, FMODE_READ|FMODE_WRITE, 0, BDEV_FS); 349 if (err < 0) 350 goto fail; 351 return bdev; 352 353fail: 354 printk(KERN_ERR "EXT3: failed to open journal device %s: %d\n", 355 bdevname(dev), err); 356 return NULL; 357} 358 359/* 360 * Release the journal device 361 */ 362static int ext3_blkdev_put(struct block_device *bdev) 363{ 364 return blkdev_put(bdev, BDEV_FS); 365} 366 367static int ext3_blkdev_remove(struct ext3_sb_info *sbi) 368{ 369 struct block_device *bdev; 370 int ret = -ENODEV; 371 372 bdev = sbi->journal_bdev; 373 if (bdev) { 374 ret = ext3_blkdev_put(bdev); 375 sbi->journal_bdev = 0; 376 } 377 return ret; 378} 379 380#define orphan_list_entry(l) list_entry((l), struct inode, u.ext3_i.i_orphan) 381 382static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi) 383{ 384 struct list_head *l; 385 386 printk(KERN_ERR "sb orphan head is %d\n", 387 le32_to_cpu(sbi->s_es->s_last_orphan)); 388 389 printk(KERN_ERR "sb_info orphan list:\n"); 390 list_for_each(l, &sbi->s_orphan) { 391 struct inode *inode = orphan_list_entry(l); 392 printk(KERN_ERR " " 393 "inode 0x%04x:%ld at %p: mode %o, nlink %d, next %d\n", 394 inode->i_dev, inode->i_ino, inode, 395 inode->i_mode, inode->i_nlink, 396 le32_to_cpu(NEXT_ORPHAN(inode))); 397 } 398} 399 400void ext3_put_super (struct super_block * sb) 401{ 402 struct ext3_sb_info *sbi = EXT3_SB(sb); 403 struct ext3_super_block *es = sbi->s_es; 404 kdev_t j_dev = sbi->s_journal->j_dev; 405 int i; 406 407 journal_destroy(sbi->s_journal); 408 if (!(sb->s_flags & MS_RDONLY)) { 409 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 410 es->s_state = le16_to_cpu(sbi->s_mount_state); 411 BUFFER_TRACE(sbi->s_sbh, "marking dirty"); 412 mark_buffer_dirty(sbi->s_sbh); 413 ext3_commit_super(sb, es, 1); 414 } 415 416 for (i = 0; i < sbi->s_gdb_count; i++) 417 brelse(sbi->s_group_desc[i]); 418 kfree(sbi->s_group_desc); 419 for (i = 0; i < EXT3_MAX_GROUP_LOADED; i++) 420 brelse(sbi->s_inode_bitmap[i]); 421 for (i = 0; i < EXT3_MAX_GROUP_LOADED; i++) 422 brelse(sbi->s_block_bitmap[i]); 423 brelse(sbi->s_sbh); 424 425 /* Debugging code just in case the in-memory inode orphan list 426 * isn't empty. The on-disk one can be non-empty if we've 427 * detected an error and taken the fs readonly, but the 428 * in-memory list had better be clean by this point. */ 429 if (!list_empty(&sbi->s_orphan)) 430 dump_orphan_list(sb, sbi); 431 J_ASSERT(list_empty(&sbi->s_orphan)); 432 433 invalidate_buffers(sb->s_dev); 434 if (j_dev != sb->s_dev) { 435 /* 436 * Invalidate the journal device's buffers. We don't want them 437 * floating about in memory - the physical journal device may 438 * hotswapped, and it breaks the `ro-after' testing code. 439 */ 440 fsync_no_super(j_dev); 441 invalidate_buffers(j_dev); 442 ext3_blkdev_remove(sbi); 443 } 444 clear_ro_after(sb); 445 446 return; 447} 448 449static struct super_operations ext3_sops = { 450 read_inode: ext3_read_inode, /* BKL held */ 451 write_inode: ext3_write_inode, /* BKL not held. Don't need */ 452 dirty_inode: ext3_dirty_inode, /* BKL not held. We take it */ 453 put_inode: ext3_put_inode, /* BKL not held. Don't need */ 454 delete_inode: ext3_delete_inode, /* BKL not held. We take it */ 455 put_super: ext3_put_super, /* BKL held */ 456 write_super: ext3_write_super, /* BKL held */ 457 write_super_lockfs: ext3_write_super_lockfs, /* BKL not held. Take it */ 458 unlockfs: ext3_unlockfs, /* BKL not held. We take it */ 459 statfs: ext3_statfs, /* BKL held */ 460 remount_fs: ext3_remount, /* BKL held */ 461}; 462 463static int want_value(char *value, char *option) 464{ 465 if (!value || !*value) { 466 printk(KERN_NOTICE "EXT3-fs: the %s option needs an argument\n", 467 option); 468 return -1; 469 } 470 return 0; 471} 472 473static int want_null_value(char *value, char *option) 474{ 475 if (*value) { 476 printk(KERN_NOTICE "EXT3-fs: Invalid %s argument: %s\n", 477 option, value); 478 return -1; 479 } 480 return 0; 481} 482 483static int want_numeric(char *value, char *option, unsigned long *number) 484{ 485 if (want_value(value, option)) 486 return -1; 487 *number = simple_strtoul(value, &value, 0); 488 if (want_null_value(value, option)) 489 return -1; 490 return 0; 491} 492 493/* 494 * This function has been shamelessly adapted from the msdos fs 495 */ 496static int parse_options (char * options, unsigned long * sb_block, 497 struct ext3_sb_info *sbi, 498 unsigned long * inum, 499 int is_remount) 500{ 501 unsigned long *mount_options = &sbi->s_mount_opt; 502 uid_t *resuid = &sbi->s_resuid; 503 gid_t *resgid = &sbi->s_resgid; 504 char * this_char; 505 char * value; 506 507 if (!options) 508 return 1; 509 for (this_char = strtok (options, ","); 510 this_char != NULL; 511 this_char = strtok (NULL, ",")) { 512 if ((value = strchr (this_char, '=')) != NULL) 513 *value++ = 0; 514 if (!strcmp (this_char, "bsddf")) 515 clear_opt (*mount_options, MINIX_DF); 516 else if (!strcmp (this_char, "nouid32")) { 517 set_opt (*mount_options, NO_UID32); 518 } 519 else if (!strcmp (this_char, "abort")) 520 set_opt (*mount_options, ABORT); 521 else if (!strcmp (this_char, "check")) { 522 if (!value || !*value || !strcmp (value, "none")) 523 clear_opt (*mount_options, CHECK); 524 else 525#ifdef CONFIG_EXT3_CHECK 526 set_opt (*mount_options, CHECK); 527#else 528 printk(KERN_ERR 529 "EXT3 Check option not supported\n"); 530#endif 531 } 532 else if (!strcmp (this_char, "debug")) 533 set_opt (*mount_options, DEBUG); 534 else if (!strcmp (this_char, "errors")) { 535 if (want_value(value, "errors")) 536 return 0; 537 if (!strcmp (value, "continue")) { 538 clear_opt (*mount_options, ERRORS_RO); 539 clear_opt (*mount_options, ERRORS_PANIC); 540 set_opt (*mount_options, ERRORS_CONT); 541 } 542 else if (!strcmp (value, "remount-ro")) { 543 clear_opt (*mount_options, ERRORS_CONT); 544 clear_opt (*mount_options, ERRORS_PANIC); 545 set_opt (*mount_options, ERRORS_RO); 546 } 547 else if (!strcmp (value, "panic")) { 548 clear_opt (*mount_options, ERRORS_CONT); 549 clear_opt (*mount_options, ERRORS_RO); 550 set_opt (*mount_options, ERRORS_PANIC); 551 } 552 else { 553 printk (KERN_ERR 554 "EXT3-fs: Invalid errors option: %s\n", 555 value); 556 return 0; 557 } 558 } 559 else if (!strcmp (this_char, "grpid") || 560 !strcmp (this_char, "bsdgroups")) 561 set_opt (*mount_options, GRPID); 562 else if (!strcmp (this_char, "minixdf")) 563 set_opt (*mount_options, MINIX_DF); 564 else if (!strcmp (this_char, "nocheck")) 565 clear_opt (*mount_options, CHECK); 566 else if (!strcmp (this_char, "nogrpid") || 567 !strcmp (this_char, "sysvgroups")) 568 clear_opt (*mount_options, GRPID); 569 else if (!strcmp (this_char, "resgid")) { 570 unsigned long v; 571 if (want_numeric(value, "resgid", &v)) 572 return 0; 573 *resgid = v; 574 } 575 else if (!strcmp (this_char, "resuid")) { 576 unsigned long v; 577 if (want_numeric(value, "resuid", &v)) 578 return 0; 579 *resuid = v; 580 } 581 else if (!strcmp (this_char, "sb")) { 582 if (want_numeric(value, "sb", sb_block)) 583 return 0; 584 } 585#ifdef CONFIG_JBD_DEBUG 586 else if (!strcmp (this_char, "ro-after")) { 587 unsigned long v; 588 if (want_numeric(value, "ro-after", &v)) 589 return 0; 590 ext3_ro_after = v; 591 } 592#endif 593 /* Silently ignore the quota options */ 594 else if (!strcmp (this_char, "grpquota") 595 || !strcmp (this_char, "noquota") 596 || !strcmp (this_char, "quota") 597 || !strcmp (this_char, "usrquota")) 598 /* Don't do anything ;-) */ ; 599 else if (!strcmp (this_char, "journal")) { 600 /* Eventually we will want to be able to create 601 a journal file here. For now, only allow the 602 user to specify an existing inode to be the 603 journal file. */ 604 if (is_remount) { 605 printk(KERN_ERR "EXT3-fs: cannot specify " 606 "journal on remount\n"); 607 return 0; 608 } 609 610 if (want_value(value, "journal")) 611 return 0; 612 if (!strcmp (value, "update")) 613 set_opt (*mount_options, UPDATE_JOURNAL); 614 else if (want_numeric(value, "journal", inum)) 615 return 0; 616 } 617 else if (!strcmp (this_char, "noload")) 618 set_opt (*mount_options, NOLOAD); 619 else if (!strcmp (this_char, "data")) { 620 int data_opt = 0; 621 622 if (want_value(value, "data")) 623 return 0; 624 if (!strcmp (value, "journal")) 625 data_opt = EXT3_MOUNT_JOURNAL_DATA; 626 else if (!strcmp (value, "ordered")) 627 data_opt = EXT3_MOUNT_ORDERED_DATA; 628 else if (!strcmp (value, "writeback")) 629 data_opt = EXT3_MOUNT_WRITEBACK_DATA; 630 else { 631 printk (KERN_ERR 632 "EXT3-fs: Invalid data option: %s\n", 633 value); 634 return 0; 635 } 636 if (is_remount) { 637 if ((*mount_options & EXT3_MOUNT_DATA_FLAGS) != 638 data_opt) { 639 printk(KERN_ERR 640 "EXT3-fs: cannot change data " 641 "mode on remount\n"); 642 return 0; 643 } 644 } else { 645 *mount_options &= ~EXT3_MOUNT_DATA_FLAGS; 646 *mount_options |= data_opt; 647 } 648 } else if (!strcmp (this_char, "commit")) { 649 unsigned long v; 650 if (want_numeric(value, "commit", &v)) 651 return 0; 652 sbi->s_commit_interval = (HZ * v); 653 } else { 654 printk (KERN_ERR 655 "EXT3-fs: Unrecognized mount option %s\n", 656 this_char); 657 return 0; 658 } 659 } 660 return 1; 661} 662 663static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es, 664 int read_only) 665{ 666 struct ext3_sb_info *sbi = EXT3_SB(sb); 667 int res = 0; 668 669 if (le32_to_cpu(es->s_rev_level) > EXT3_MAX_SUPP_REV) { 670 printk (KERN_ERR "EXT3-fs warning: revision level too high, " 671 "forcing read-only mode\n"); 672 res = MS_RDONLY; 673 } 674 if (read_only) 675 return res; 676 if (!(sbi->s_mount_state & EXT3_VALID_FS)) 677 printk (KERN_WARNING "EXT3-fs warning: mounting unchecked fs, " 678 "running e2fsck is recommended\n"); 679 else if ((sbi->s_mount_state & EXT3_ERROR_FS)) 680 printk (KERN_WARNING 681 "EXT3-fs warning: mounting fs with errors, " 682 "running e2fsck is recommended\n"); 683 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && 684 le16_to_cpu(es->s_mnt_count) >= 685 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) 686 printk (KERN_WARNING 687 "EXT3-fs warning: maximal mount count reached, " 688 "running e2fsck is recommended\n"); 689 else if (le32_to_cpu(es->s_checkinterval) && 690 (le32_to_cpu(es->s_lastcheck) + 691 le32_to_cpu(es->s_checkinterval) <= CURRENT_TIME)) 692 printk (KERN_WARNING 693 "EXT3-fs warning: checktime reached, " 694 "running e2fsck is recommended\n"); 695 if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) 696 es->s_max_mnt_count = 697 (__s16) cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT); 698 es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1); 699 es->s_mtime = cpu_to_le32(CURRENT_TIME); 700 ext3_update_dynamic_rev(sb); 701 EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 702 ext3_commit_super (sb, es, 1); 703 if (test_opt (sb, DEBUG)) 704 printk (KERN_INFO 705 "[EXT3 FS %s, %s, bs=%lu, gc=%lu, " 706 "bpg=%lu, ipg=%lu, mo=%04lx]\n", 707 EXT3FS_VERSION, EXT3FS_DATE, sb->s_blocksize, 708 sbi->s_groups_count, 709 EXT3_BLOCKS_PER_GROUP(sb), 710 EXT3_INODES_PER_GROUP(sb), 711 sbi->s_mount_opt); 712 printk(KERN_INFO "EXT3 FS " EXT3FS_VERSION ", " EXT3FS_DATE " on %s, ", 713 bdevname(sb->s_dev)); 714 if (EXT3_SB(sb)->s_journal->j_inode == NULL) { 715 printk("external journal on %s\n", 716 bdevname(EXT3_SB(sb)->s_journal->j_dev)); 717 } else { 718 printk("internal journal\n"); 719 } 720#ifdef CONFIG_EXT3_CHECK 721 if (test_opt (sb, CHECK)) { 722 ext3_check_blocks_bitmap (sb); 723 ext3_check_inodes_bitmap (sb); 724 } 725#endif 726 setup_ro_after(sb); 727 return res; 728} 729 730static int ext3_check_descriptors (struct super_block * sb) 731{ 732 struct ext3_sb_info *sbi = EXT3_SB(sb); 733 unsigned long block = le32_to_cpu(sbi->s_es->s_first_data_block); 734 struct ext3_group_desc * gdp = NULL; 735 int desc_block = 0; 736 int i; 737 738 ext3_debug ("Checking group descriptors"); 739 740 for (i = 0; i < sbi->s_groups_count; i++) 741 { 742 if ((i % EXT3_DESC_PER_BLOCK(sb)) == 0) 743 gdp = (struct ext3_group_desc *) 744 sbi->s_group_desc[desc_block++]->b_data; 745 if (le32_to_cpu(gdp->bg_block_bitmap) < block || 746 le32_to_cpu(gdp->bg_block_bitmap) >= 747 block + EXT3_BLOCKS_PER_GROUP(sb)) 748 { 749 ext3_error (sb, "ext3_check_descriptors", 750 "Block bitmap for group %d" 751 " not in group (block %lu)!", 752 i, (unsigned long) 753 le32_to_cpu(gdp->bg_block_bitmap)); 754 return 0; 755 } 756 if (le32_to_cpu(gdp->bg_inode_bitmap) < block || 757 le32_to_cpu(gdp->bg_inode_bitmap) >= 758 block + EXT3_BLOCKS_PER_GROUP(sb)) 759 { 760 ext3_error (sb, "ext3_check_descriptors", 761 "Inode bitmap for group %d" 762 " not in group (block %lu)!", 763 i, (unsigned long) 764 le32_to_cpu(gdp->bg_inode_bitmap)); 765 return 0; 766 } 767 if (le32_to_cpu(gdp->bg_inode_table) < block || 768 le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group >= 769 block + EXT3_BLOCKS_PER_GROUP(sb)) 770 { 771 ext3_error (sb, "ext3_check_descriptors", 772 "Inode table for group %d" 773 " not in group (block %lu)!", 774 i, (unsigned long) 775 le32_to_cpu(gdp->bg_inode_table)); 776 return 0; 777 } 778 block += EXT3_BLOCKS_PER_GROUP(sb); 779 gdp++; 780 } 781 return 1; 782} 783 784 785/* ext3_orphan_cleanup() walks a singly-linked list of inodes (starting at 786 * the superblock) which were deleted from all directories, but held open by 787 * a process at the time of a crash. We walk the list and try to delete these 788 * inodes at recovery time (only with a read-write filesystem). 789 * 790 * In order to keep the orphan inode chain consistent during traversal (in 791 * case of crash during recovery), we link each inode into the superblock 792 * orphan list_head and handle it the same way as an inode deletion during 793 * normal operation (which journals the operations for us). 794 * 795 * We only do an iget() and an iput() on each inode, which is very safe if we 796 * accidentally point at an in-use or already deleted inode. The worst that 797 * can happen in this case is that we get a "bit already cleared" message from 798 * ext3_free_inode(). The only reason we would point at a wrong inode is if 799 * e2fsck was run on this filesystem, and it must have already done the orphan 800 * inode cleanup for us, so we can safely abort without any further action. 801 */ 802static void ext3_orphan_cleanup (struct super_block * sb, 803 struct ext3_super_block * es) 804{ 805 unsigned int s_flags = sb->s_flags; 806 int nr_orphans = 0, nr_truncates = 0; 807 if (!es->s_last_orphan) { 808 jbd_debug(4, "no orphan inodes to clean up\n"); 809 return; 810 } 811 812 if (s_flags & MS_RDONLY) { 813 printk(KERN_INFO "EXT3-fs: %s: orphan cleanup on readonly fs\n", 814 bdevname(sb->s_dev)); 815 sb->s_flags &= ~MS_RDONLY; 816 } 817 818 if (sb->u.ext3_sb.s_mount_state & EXT3_ERROR_FS) { 819 if (es->s_last_orphan) 820 jbd_debug(1, "Errors on filesystem, " 821 "clearing orphan list.\n"); 822 es->s_last_orphan = 0; 823 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); 824 return; 825 } 826 827 while (es->s_last_orphan) { 828 struct inode *inode; 829 830 if (!(inode = 831 ext3_orphan_get(sb, le32_to_cpu(es->s_last_orphan)))) { 832 es->s_last_orphan = 0; 833 break; 834 } 835 836 list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan); 837 if (inode->i_nlink) { 838 printk(KERN_DEBUG "%s: truncating inode %ld to %Ld " 839 "bytes\n", __FUNCTION__, inode->i_ino, 840 inode->i_size); 841 jbd_debug(2, "truncating inode %ld to %Ld bytes\n", 842 inode->i_ino, inode->i_size); 843 ext3_truncate(inode); 844 nr_truncates++; 845 } else { 846 printk(KERN_DEBUG "%s: deleting unreferenced " 847 "inode %ld\n", __FUNCTION__, inode->i_ino); 848 jbd_debug(2, "deleting unreferenced inode %ld\n", 849 inode->i_ino); 850 nr_orphans++; 851 } 852 iput(inode); /* The delete magic happens here! */ 853 } 854 855#define PLURAL(x) (x), ((x)==1) ? "" : "s" 856 857 if (nr_orphans) 858 printk(KERN_INFO "EXT3-fs: %s: %d orphan inode%s deleted\n", 859 bdevname(sb->s_dev), PLURAL(nr_orphans)); 860 if (nr_truncates) 861 printk(KERN_INFO "EXT3-fs: %s: %d truncate%s cleaned up\n", 862 bdevname(sb->s_dev), PLURAL(nr_truncates)); 863 sb->s_flags = s_flags; /* Restore MS_RDONLY status */ 864} 865 866#define log2(n) ffz(~(n)) 867 868/* 869 * Maximal file size. There is a direct, and {,double-,triple-}indirect 870 * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks. 871 * We need to be 1 filesystem block less than the 2^32 sector limit. 872 */ 873static loff_t ext3_max_size(int bits) 874{ 875 loff_t res = EXT3_NDIR_BLOCKS; 876 res += 1LL << (bits-2); 877 res += 1LL << (2*(bits-2)); 878 res += 1LL << (3*(bits-2)); 879 res <<= bits; 880 if (res > (512LL << 32) - (1 << bits)) 881 res = (512LL << 32) - (1 << bits); 882 return res; 883} 884 885struct super_block * ext3_read_super (struct super_block * sb, void * data, 886 int silent) 887{ 888 struct buffer_head * bh; 889 struct ext3_super_block *es = 0; 890 struct ext3_sb_info *sbi = EXT3_SB(sb); 891 unsigned long sb_block = 1; 892 unsigned long logic_sb_block = 1; 893 unsigned long offset = 0; 894 unsigned long journal_inum = 0; 895 kdev_t dev = sb->s_dev; 896 int blocksize; 897 int hblock; 898 int db_count; 899 int i; 900 int needs_recovery; 901 902#ifdef CONFIG_JBD_DEBUG 903 ext3_ro_after = 0; 904#endif 905 /* 906 * See what the current blocksize for the device is, and 907 * use that as the blocksize. Otherwise (or if the blocksize 908 * is smaller than the default) use the default. 909 * This is important for devices that have a hardware 910 * sectorsize that is larger than the default. 911 */ 912 blocksize = EXT3_MIN_BLOCK_SIZE; 913 hblock = get_hardsect_size(dev); 914 if (blocksize < hblock) 915 blocksize = hblock; 916 917 sbi->s_mount_opt = 0; 918 sbi->s_resuid = EXT3_DEF_RESUID; 919 sbi->s_resgid = EXT3_DEF_RESGID; 920 if (!parse_options ((char *) data, &sb_block, sbi, &journal_inum, 0)) { 921 sb->s_dev = 0; 922 goto out_fail; 923 } 924 925 sb->s_blocksize = blocksize; 926 set_blocksize (dev, blocksize); 927 928 /* 929 * The ext3 superblock will not be buffer aligned for other than 1kB 930 * block sizes. We need to calculate the offset from buffer start. 931 */ 932 if (blocksize != EXT3_MIN_BLOCK_SIZE) { 933 logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize; 934 offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize; 935 } 936 937 if (!(bh = sb_bread(sb, logic_sb_block))) { 938 printk (KERN_ERR "EXT3-fs: unable to read superblock\n"); 939 goto out_fail; 940 } 941 /* 942 * Note: s_es must be initialized as soon as possible because 943 * some ext3 macro-instructions depend on its value 944 */ 945 es = (struct ext3_super_block *) (((char *)bh->b_data) + offset); 946 sbi->s_es = es; 947 sb->s_magic = le16_to_cpu(es->s_magic); 948 if (sb->s_magic != EXT3_SUPER_MAGIC) { 949 if (!silent) 950 printk(KERN_ERR 951 "VFS: Can't find ext3 filesystem on dev %s.\n", 952 bdevname(dev)); 953 goto failed_mount; 954 } 955 if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV && 956 (EXT3_HAS_COMPAT_FEATURE(sb, ~0U) || 957 EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) || 958 EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U))) 959 printk(KERN_WARNING 960 "EXT3-fs warning: feature flags set on rev 0 fs, " 961 "running e2fsck is recommended\n"); 962 /* 963 * Check feature flags regardless of the revision level, since we 964 * previously didn't change the revision level when setting the flags, 965 * so there is a chance incompat flags are set on a rev 0 filesystem. 966 */ 967 if ((i = EXT3_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP))) { 968 printk(KERN_ERR "EXT3-fs: %s: couldn't mount because of " 969 "unsupported optional features (%x).\n", 970 bdevname(dev), i); 971 goto failed_mount; 972 } 973 if (!(sb->s_flags & MS_RDONLY) && 974 (i = EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP))){ 975 printk(KERN_ERR "EXT3-fs: %s: couldn't mount RDWR because of " 976 "unsupported optional features (%x).\n", 977 bdevname(dev), i); 978 goto failed_mount; 979 } 980 sb->s_blocksize_bits = le32_to_cpu(es->s_log_block_size) + 10; 981 sb->s_blocksize = 1 << sb->s_blocksize_bits; 982 983 if (sb->s_blocksize < EXT3_MIN_BLOCK_SIZE || 984 sb->s_blocksize > EXT3_MAX_BLOCK_SIZE) { 985 printk(KERN_ERR 986 "EXT3-fs: Unsupported filesystem blocksize %d on %s.\n", 987 blocksize, bdevname(dev)); 988 goto failed_mount; 989 } 990 991 sb->s_maxbytes = ext3_max_size(sb->s_blocksize_bits); 992 993 if (sb->s_blocksize != blocksize) { 994 blocksize = sb->s_blocksize; 995 996 /* 997 * Make sure the blocksize for the filesystem is larger 998 * than the hardware sectorsize for the machine. 999 */ 1000 if (sb->s_blocksize < hblock) { 1001 printk(KERN_ERR "EXT3-fs: blocksize %d too small for " 1002 "device blocksize %d.\n", blocksize, hblock); 1003 goto failed_mount; 1004 } 1005 1006 brelse (bh); 1007 set_blocksize (dev, sb->s_blocksize); 1008 logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize; 1009 offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize; 1010 bh = sb_bread(sb, logic_sb_block); 1011 if (!bh) { 1012 printk(KERN_ERR 1013 "EXT3-fs: Can't read superblock on 2nd try.\n"); 1014 return NULL; 1015 } 1016 es = (struct ext3_super_block *)(((char *)bh->b_data) + offset); 1017 sbi->s_es = es; 1018 if (es->s_magic != le16_to_cpu(EXT3_SUPER_MAGIC)) { 1019 printk (KERN_ERR 1020 "EXT3-fs: Magic mismatch, very weird !\n"); 1021 goto failed_mount; 1022 } 1023 } 1024 1025 if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV) { 1026 sbi->s_inode_size = EXT3_GOOD_OLD_INODE_SIZE; 1027 sbi->s_first_ino = EXT3_GOOD_OLD_FIRST_INO; 1028 } else { 1029 sbi->s_inode_size = le16_to_cpu(es->s_inode_size); 1030 sbi->s_first_ino = le32_to_cpu(es->s_first_ino); 1031 if (sbi->s_inode_size != EXT3_GOOD_OLD_INODE_SIZE) { 1032 printk (KERN_ERR 1033 "EXT3-fs: unsupported inode size: %d\n", 1034 sbi->s_inode_size); 1035 goto failed_mount; 1036 } 1037 } 1038 sbi->s_frag_size = EXT3_MIN_FRAG_SIZE << 1039 le32_to_cpu(es->s_log_frag_size); 1040 if (blocksize != sbi->s_frag_size) { 1041 printk(KERN_ERR 1042 "EXT3-fs: fragsize %lu != blocksize %u (unsupported)\n", 1043 sbi->s_frag_size, blocksize); 1044 goto failed_mount; 1045 } 1046 sbi->s_frags_per_block = 1; 1047 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); 1048 sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group); 1049 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); 1050 sbi->s_inodes_per_block = blocksize / EXT3_INODE_SIZE(sb); 1051 sbi->s_itb_per_group = sbi->s_inodes_per_group /sbi->s_inodes_per_block; 1052 sbi->s_desc_per_block = blocksize / sizeof(struct ext3_group_desc); 1053 sbi->s_sbh = bh; 1054 if (sbi->s_resuid == EXT3_DEF_RESUID) 1055 sbi->s_resuid = le16_to_cpu(es->s_def_resuid); 1056 if (sbi->s_resgid == EXT3_DEF_RESGID) 1057 sbi->s_resgid = le16_to_cpu(es->s_def_resgid); 1058 sbi->s_mount_state = le16_to_cpu(es->s_state); 1059 sbi->s_addr_per_block_bits = log2(EXT3_ADDR_PER_BLOCK(sb)); 1060 sbi->s_desc_per_block_bits = log2(EXT3_DESC_PER_BLOCK(sb)); 1061 1062 if (sbi->s_blocks_per_group > blocksize * 8) { 1063 printk (KERN_ERR 1064 "EXT3-fs: #blocks per group too big: %lu\n", 1065 sbi->s_blocks_per_group); 1066 goto failed_mount; 1067 } 1068 if (sbi->s_frags_per_group > blocksize * 8) { 1069 printk (KERN_ERR 1070 "EXT3-fs: #fragments per group too big: %lu\n", 1071 sbi->s_frags_per_group); 1072 goto failed_mount; 1073 } 1074 if (sbi->s_inodes_per_group > blocksize * 8) { 1075 printk (KERN_ERR 1076 "EXT3-fs: #inodes per group too big: %lu\n", 1077 sbi->s_inodes_per_group); 1078 goto failed_mount; 1079 } 1080 1081 sbi->s_groups_count = (le32_to_cpu(es->s_blocks_count) - 1082 le32_to_cpu(es->s_first_data_block) + 1083 EXT3_BLOCKS_PER_GROUP(sb) - 1) / 1084 EXT3_BLOCKS_PER_GROUP(sb); 1085 db_count = (sbi->s_groups_count + EXT3_DESC_PER_BLOCK(sb) - 1) / 1086 EXT3_DESC_PER_BLOCK(sb); 1087 sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *), 1088 GFP_KERNEL); 1089 if (sbi->s_group_desc == NULL) { 1090 printk (KERN_ERR "EXT3-fs: not enough memory\n"); 1091 goto failed_mount; 1092 } 1093 for (i = 0; i < db_count; i++) { 1094 sbi->s_group_desc[i] = sb_bread(sb, logic_sb_block + i + 1); 1095 if (!sbi->s_group_desc[i]) { 1096 printk (KERN_ERR "EXT3-fs: " 1097 "can't read group descriptor %d\n", i); 1098 db_count = i; 1099 goto failed_mount2; 1100 } 1101 } 1102 if (!ext3_check_descriptors (sb)) { 1103 printk (KERN_ERR "EXT3-fs: group descriptors corrupted !\n"); 1104 goto failed_mount2; 1105 } 1106 for (i = 0; i < EXT3_MAX_GROUP_LOADED; i++) { 1107 sbi->s_inode_bitmap_number[i] = 0; 1108 sbi->s_inode_bitmap[i] = NULL; 1109 sbi->s_block_bitmap_number[i] = 0; 1110 sbi->s_block_bitmap[i] = NULL; 1111 } 1112 sbi->s_loaded_inode_bitmaps = 0; 1113 sbi->s_loaded_block_bitmaps = 0; 1114 sbi->s_gdb_count = db_count; 1115 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 1116 /* 1117 * set up enough so that it can read an inode 1118 */ 1119 sb->s_op = &ext3_sops; 1120 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 1121 1122 sb->s_root = 0; 1123 1124 needs_recovery = (es->s_last_orphan != 0 || 1125 EXT3_HAS_INCOMPAT_FEATURE(sb, 1126 EXT3_FEATURE_INCOMPAT_RECOVER)); 1127 1128 /* 1129 * The first inode we look at is the journal inode. Don't try 1130 * root first: it may be modified in the journal! 1131 */ 1132 if (!test_opt(sb, NOLOAD) && 1133 EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) { 1134 if (ext3_load_journal(sb, es)) 1135 goto failed_mount2; 1136 } else if (journal_inum) { 1137 if (ext3_create_journal(sb, es, journal_inum)) 1138 goto failed_mount2; 1139 } else { 1140 if (!silent) 1141 printk (KERN_ERR 1142 "ext3: No journal on filesystem on %s\n", 1143 bdevname(dev)); 1144 goto failed_mount2; 1145 } 1146 1147 /* We have now updated the journal if required, so we can 1148 * validate the data journaling mode. */ 1149 switch (test_opt(sb, DATA_FLAGS)) { 1150 case 0: 1151 /* No mode set, assume a default based on the journal 1152 capabilities: ORDERED_DATA if the journal can 1153 cope, else JOURNAL_DATA */ 1154 if (journal_check_available_features 1155 (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) 1156 set_opt(sbi->s_mount_opt, ORDERED_DATA); 1157 else 1158 set_opt(sbi->s_mount_opt, JOURNAL_DATA); 1159 break; 1160 1161 case EXT3_MOUNT_ORDERED_DATA: 1162 case EXT3_MOUNT_WRITEBACK_DATA: 1163 if (!journal_check_available_features 1164 (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) { 1165 printk(KERN_ERR "EXT3-fs: Journal does not support " 1166 "requested data journaling mode\n"); 1167 goto failed_mount3; 1168 } 1169 default: 1170 break; 1171 } 1172 1173 /* 1174 * The journal_load will have done any necessary log recovery, 1175 * so we can safely mount the rest of the filesystem now. 1176 */ 1177 1178 sb->s_root = d_alloc_root(iget(sb, EXT3_ROOT_INO)); 1179 if (!sb->s_root || !S_ISDIR(sb->s_root->d_inode->i_mode) || 1180 !sb->s_root->d_inode->i_blocks || !sb->s_root->d_inode->i_size) { 1181 if (sb->s_root) { 1182 dput(sb->s_root); 1183 sb->s_root = NULL; 1184 printk(KERN_ERR 1185 "EXT3-fs: corrupt root inode, run e2fsck\n"); 1186 } else 1187 printk(KERN_ERR "EXT3-fs: get root inode failed\n"); 1188 goto failed_mount3; 1189 } 1190 1191 ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); 1192 /* 1193 * akpm: core read_super() calls in here with the superblock locked. 1194 * That deadlocks, because orphan cleanup needs to lock the superblock 1195 * in numerous places. Here we just pop the lock - it's relatively 1196 * harmless, because we are now ready to accept write_super() requests, 1197 * and aviro says that's the only reason for hanging onto the 1198 * superblock lock. 1199 */ 1200 EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS; 1201 unlock_super(sb); /* akpm: sigh */ 1202 ext3_orphan_cleanup(sb, es); 1203 lock_super(sb); 1204 EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS; 1205 if (needs_recovery) 1206 printk (KERN_INFO "EXT3-fs: recovery complete.\n"); 1207 ext3_mark_recovery_complete(sb, es); 1208 printk (KERN_INFO "EXT3-fs: mounted filesystem with %s data mode.\n", 1209 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal": 1210 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": 1211 "writeback"); 1212 1213 return sb; 1214 1215failed_mount3: 1216 journal_destroy(sbi->s_journal); 1217failed_mount2: 1218 for (i = 0; i < db_count; i++) 1219 brelse(sbi->s_group_desc[i]); 1220 kfree(sbi->s_group_desc); 1221failed_mount: 1222 ext3_blkdev_remove(sbi); 1223 brelse(bh); 1224out_fail: 1225 return NULL; 1226} 1227 1228/* 1229 * Setup any per-fs journal parameters now. We'll do this both on 1230 * initial mount, once the journal has been initialised but before we've 1231 * done any recovery; and again on any subsequent remount. 1232 */ 1233static void ext3_init_journal_params(struct ext3_sb_info *sbi, 1234 journal_t *journal) 1235{ 1236 if (sbi->s_commit_interval) 1237 journal->j_commit_interval = sbi->s_commit_interval; 1238 /* We could also set up an ext3-specific default for the commit 1239 * interval here, but for now we'll just fall back to the jbd 1240 * default. */ 1241} 1242 1243 1244static journal_t *ext3_get_journal(struct super_block *sb, int journal_inum) 1245{ 1246 struct inode *journal_inode; 1247 journal_t *journal; 1248 1249 /* First, test for the existence of a valid inode on disk. Bad 1250 * things happen if we iget() an unused inode, as the subsequent 1251 * iput() will try to delete it. */ 1252 1253 journal_inode = iget(sb, journal_inum); 1254 if (!journal_inode) { 1255 printk(KERN_ERR "EXT3-fs: no journal found.\n"); 1256 return NULL; 1257 } 1258 if (!journal_inode->i_nlink) { 1259 make_bad_inode(journal_inode); 1260 iput(journal_inode); 1261 printk(KERN_ERR "EXT3-fs: journal inode is deleted.\n"); 1262 return NULL; 1263 } 1264 1265 jbd_debug(2, "Journal inode found at %p: %Ld bytes\n", 1266 journal_inode, journal_inode->i_size); 1267 if (is_bad_inode(journal_inode) || !S_ISREG(journal_inode->i_mode)) { 1268 printk(KERN_ERR "EXT3-fs: invalid journal inode.\n"); 1269 iput(journal_inode); 1270 return NULL; 1271 } 1272 1273 journal = journal_init_inode(journal_inode); 1274 if (!journal) { 1275 printk(KERN_ERR "EXT3-fs: Could not load journal inode\n"); 1276 iput(journal_inode); 1277 } 1278 ext3_init_journal_params(EXT3_SB(sb), journal); 1279 return journal; 1280} 1281 1282static journal_t *ext3_get_dev_journal(struct super_block *sb, 1283 int dev) 1284{ 1285 struct buffer_head * bh; 1286 journal_t *journal; 1287 int start; 1288 int len; 1289 int hblock, blocksize; 1290 unsigned long sb_block; 1291 unsigned long offset; 1292 kdev_t journal_dev = to_kdev_t(dev); 1293 struct ext3_super_block * es; 1294 struct block_device *bdev; 1295 1296 bdev = ext3_blkdev_get(journal_dev); 1297 if (bdev == NULL) 1298 return NULL; 1299 1300 blocksize = sb->s_blocksize; 1301 hblock = get_hardsect_size(journal_dev); 1302 if (blocksize < hblock) { 1303 printk(KERN_ERR 1304 "EXT3-fs: blocksize too small for journal device.\n"); 1305 goto out_bdev; 1306 } 1307 1308 sb_block = EXT3_MIN_BLOCK_SIZE / blocksize; 1309 offset = EXT3_MIN_BLOCK_SIZE % blocksize; 1310 set_blocksize(dev, blocksize); 1311 if (!(bh = bread(dev, sb_block, blocksize))) { 1312 printk(KERN_ERR "EXT3-fs: couldn't read superblock of " 1313 "external journal\n"); 1314 goto out_bdev; 1315 } 1316 1317 es = (struct ext3_super_block *) (((char *)bh->b_data) + offset); 1318 if ((le16_to_cpu(es->s_magic) != EXT3_SUPER_MAGIC) || 1319 !(le32_to_cpu(es->s_feature_incompat) & 1320 EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) { 1321 printk(KERN_ERR "EXT3-fs: external journal has " 1322 "bad superblock\n"); 1323 brelse(bh); 1324 goto out_bdev; 1325 } 1326 1327 if (memcmp(EXT3_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { 1328 printk(KERN_ERR "EXT3-fs: journal UUID does not match\n"); 1329 brelse(bh); 1330 goto out_bdev; 1331 } 1332 1333 len = le32_to_cpu(es->s_blocks_count); 1334 start = sb_block + 1; 1335 brelse(bh); /* we're done with the superblock */ 1336 1337 journal = journal_init_dev(journal_dev, sb->s_dev, 1338 start, len, blocksize); 1339 if (!journal) { 1340 printk(KERN_ERR "EXT3-fs: failed to create device journal\n"); 1341 goto out_bdev; 1342 } 1343 ll_rw_block(READ, 1, &journal->j_sb_buffer); 1344 wait_on_buffer(journal->j_sb_buffer); 1345 if (!buffer_uptodate(journal->j_sb_buffer)) { 1346 printk(KERN_ERR "EXT3-fs: I/O error on journal device\n"); 1347 goto out_journal; 1348 } 1349 if (ntohl(journal->j_superblock->s_nr_users) != 1) { 1350 printk(KERN_ERR "EXT3-fs: External journal has more than one " 1351 "user (unsupported) - %d\n", 1352 ntohl(journal->j_superblock->s_nr_users)); 1353 goto out_journal; 1354 } 1355 EXT3_SB(sb)->journal_bdev = bdev; 1356 ext3_init_journal_params(EXT3_SB(sb), journal); 1357 return journal; 1358out_journal: 1359 journal_destroy(journal); 1360out_bdev: 1361 ext3_blkdev_put(bdev); 1362 return NULL; 1363} 1364 1365static int ext3_load_journal(struct super_block * sb, 1366 struct ext3_super_block * es) 1367{ 1368 journal_t *journal; 1369 int journal_inum = le32_to_cpu(es->s_journal_inum); 1370 int journal_dev = le32_to_cpu(es->s_journal_dev); 1371 int err = 0; 1372 int really_read_only; 1373 1374 really_read_only = is_read_only(sb->s_dev); 1375 1376 /* 1377 * Are we loading a blank journal or performing recovery after a 1378 * crash? For recovery, we need to check in advance whether we 1379 * can get read-write access to the device. 1380 */ 1381 1382 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)) { 1383 if (sb->s_flags & MS_RDONLY) { 1384 printk(KERN_INFO "EXT3-fs: INFO: recovery " 1385 "required on readonly filesystem.\n"); 1386 if (really_read_only) { 1387 printk(KERN_ERR "EXT3-fs: write access " 1388 "unavailable, cannot proceed.\n"); 1389 return -EROFS; 1390 } 1391 printk (KERN_INFO "EXT3-fs: write access will " 1392 "be enabled during recovery.\n"); 1393 } 1394 } 1395 1396 if (journal_inum && journal_dev) { 1397 printk(KERN_ERR "EXT3-fs: filesystem has both journal " 1398 "and inode journals!\n"); 1399 return -EINVAL; 1400 } 1401 1402 if (journal_inum) { 1403 if (!(journal = ext3_get_journal(sb, journal_inum))) 1404 return -EINVAL; 1405 } else { 1406 if (!(journal = ext3_get_dev_journal(sb, journal_dev))) 1407 return -EINVAL; 1408 } 1409 1410 1411 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { 1412 err = journal_update_format(journal); 1413 if (err) { 1414 printk(KERN_ERR "EXT3-fs: error updating journal.\n"); 1415 journal_destroy(journal); 1416 return err; 1417 } 1418 } 1419 1420 if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)) 1421 err = journal_wipe(journal, !really_read_only); 1422 if (!err) 1423 err = journal_load(journal); 1424 1425 if (err) { 1426 printk(KERN_ERR "EXT3-fs: error loading journal.\n"); 1427 journal_destroy(journal); 1428 return err; 1429 } 1430 1431 EXT3_SB(sb)->s_journal = journal; 1432 ext3_clear_journal_err(sb, es); 1433 return 0; 1434} 1435 1436static int ext3_create_journal(struct super_block * sb, 1437 struct ext3_super_block * es, 1438 int journal_inum) 1439{ 1440 journal_t *journal; 1441 1442 if (sb->s_flags & MS_RDONLY) { 1443 printk(KERN_ERR "EXT3-fs: readonly filesystem when trying to " 1444 "create journal.\n"); 1445 return -EROFS; 1446 } 1447 1448 if (!(journal = ext3_get_journal(sb, journal_inum))) 1449 return -EINVAL; 1450 1451 printk(KERN_INFO "EXT3-fs: creating new journal on inode %d\n", 1452 journal_inum); 1453 1454 if (journal_create(journal)) { 1455 printk(KERN_ERR "EXT3-fs: error creating journal.\n"); 1456 journal_destroy(journal); 1457 return -EIO; 1458 } 1459 1460 EXT3_SB(sb)->s_journal = journal; 1461 1462 ext3_update_dynamic_rev(sb); 1463 EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 1464 EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL); 1465 1466 es->s_journal_inum = cpu_to_le32(journal_inum); 1467 sb->s_dirt = 1; 1468 1469 /* Make sure we flush the recovery flag to disk. */ 1470 ext3_commit_super(sb, es, 1); 1471 1472 return 0; 1473} 1474 1475static void ext3_commit_super (struct super_block * sb, 1476 struct ext3_super_block * es, 1477 int sync) 1478{ 1479 es->s_wtime = cpu_to_le32(CURRENT_TIME); 1480 BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "marking dirty"); 1481 mark_buffer_dirty(sb->u.ext3_sb.s_sbh); 1482 if (sync) { 1483 ll_rw_block(WRITE, 1, &sb->u.ext3_sb.s_sbh); 1484 wait_on_buffer(sb->u.ext3_sb.s_sbh); 1485 } 1486} 1487 1488 1489/* 1490 * Have we just finished recovery? If so, and if we are mounting (or 1491 * remounting) the filesystem readonly, then we will end up with a 1492 * consistent fs on disk. Record that fact. 1493 */ 1494static void ext3_mark_recovery_complete(struct super_block * sb, 1495 struct ext3_super_block * es) 1496{ 1497 journal_flush(EXT3_SB(sb)->s_journal); 1498 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) && 1499 sb->s_flags & MS_RDONLY) { 1500 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 1501 sb->s_dirt = 0; 1502 ext3_commit_super(sb, es, 1); 1503 } 1504} 1505 1506/* 1507 * If we are mounting (or read-write remounting) a filesystem whose journal 1508 * has recorded an error from a previous lifetime, move that error to the 1509 * main filesystem now. 1510 */ 1511static void ext3_clear_journal_err(struct super_block * sb, 1512 struct ext3_super_block * es) 1513{ 1514 journal_t *journal; 1515 int j_errno; 1516 const char *errstr; 1517 1518 journal = EXT3_SB(sb)->s_journal; 1519 1520 /* 1521 * Now check for any error status which may have been recorded in the 1522 * journal by a prior ext3_error() or ext3_abort() 1523 */ 1524 1525 j_errno = journal_errno(journal); 1526 if (j_errno) { 1527 char nbuf[16]; 1528 1529 errstr = ext3_decode_error(sb, j_errno, nbuf); 1530 ext3_warning(sb, __FUNCTION__, "Filesystem error recorded " 1531 "from previous mount: %s", errstr); 1532 ext3_warning(sb, __FUNCTION__, "Marking fs in need of " 1533 "filesystem check."); 1534 1535 sb->u.ext3_sb.s_mount_state |= EXT3_ERROR_FS; 1536 es->s_state |= cpu_to_le16(EXT3_ERROR_FS); 1537 ext3_commit_super (sb, es, 1); 1538 1539 journal_clear_err(journal); 1540 } 1541} 1542 1543/* 1544 * Force the running and committing transactions to commit, 1545 * and wait on the commit. 1546 */ 1547int ext3_force_commit(struct super_block *sb) 1548{ 1549 journal_t *journal; 1550 int ret; 1551 1552 if (sb->s_flags & MS_RDONLY) 1553 return 0; 1554 1555 journal = EXT3_SB(sb)->s_journal; 1556 sb->s_dirt = 0; 1557 lock_kernel(); /* important: lock down j_running_transaction */ 1558 ret = ext3_journal_force_commit(journal); 1559 unlock_kernel(); 1560 return ret; 1561} 1562 1563/* 1564 * Ext3 always journals updates to the superblock itself, so we don't 1565 * have to propagate any other updates to the superblock on disk at this 1566 * point. Just start an async writeback to get the buffers on their way 1567 * to the disk. 1568 * 1569 * This implicitly triggers the writebehind on sync(). 1570 */ 1571 1572static int do_sync_supers = 0; 1573MODULE_PARM(do_sync_supers, "i"); 1574MODULE_PARM_DESC(do_sync_supers, "Write superblocks synchronously"); 1575 1576void ext3_write_super (struct super_block * sb) 1577{ 1578 tid_t target; 1579 1580 if (down_trylock(&sb->s_lock) == 0) 1581 BUG(); /* aviro detector */ 1582 sb->s_dirt = 0; 1583 target = log_start_commit(EXT3_SB(sb)->s_journal, NULL); 1584 1585 if (do_sync_supers) { 1586 unlock_super(sb); 1587 log_wait_commit(EXT3_SB(sb)->s_journal, target); 1588 lock_super(sb); 1589 } 1590} 1591 1592/* 1593 * LVM calls this function before a (read-only) snapshot is created. This 1594 * gives us a chance to flush the journal completely and mark the fs clean. 1595 */ 1596void ext3_write_super_lockfs(struct super_block *sb) 1597{ 1598 sb->s_dirt = 0; 1599 1600 lock_kernel(); /* 2.4.5 forgot to do this for us */ 1601 if (!(sb->s_flags & MS_RDONLY)) { 1602 journal_t *journal = EXT3_SB(sb)->s_journal; 1603 1604 /* Now we set up the journal barrier. */ 1605 unlock_super(sb); 1606 journal_lock_updates(journal); 1607 journal_flush(journal); 1608 lock_super(sb); 1609 1610 /* Journal blocked and flushed, clear needs_recovery flag. */ 1611 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 1612 ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1); 1613 } 1614 unlock_kernel(); 1615} 1616 1617/* 1618 * Called by LVM after the snapshot is done. We need to reset the RECOVER 1619 * flag here, even though the filesystem is not technically dirty yet. 1620 */ 1621void ext3_unlockfs(struct super_block *sb) 1622{ 1623 if (!(sb->s_flags & MS_RDONLY)) { 1624 lock_kernel(); 1625 lock_super(sb); 1626 /* Reser the needs_recovery flag before the fs is unlocked. */ 1627 EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 1628 ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1); 1629 unlock_super(sb); 1630 journal_unlock_updates(EXT3_SB(sb)->s_journal); 1631 unlock_kernel(); 1632 } 1633} 1634 1635int ext3_remount (struct super_block * sb, int * flags, char * data) 1636{ 1637 struct ext3_super_block * es; 1638 struct ext3_sb_info *sbi = EXT3_SB(sb); 1639 unsigned long tmp; 1640 1641 clear_ro_after(sb); 1642 1643 /* 1644 * Allow the "check" option to be passed as a remount option. 1645 */ 1646 if (!parse_options(data, &tmp, sbi, &tmp, 1)) 1647 return -EINVAL; 1648 1649 if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) 1650 ext3_abort(sb, __FUNCTION__, "Abort forced by user"); 1651 1652 es = sbi->s_es; 1653 1654 ext3_init_journal_params(sbi, sbi->s_journal); 1655 1656 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) { 1657 if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) 1658 return -EROFS; 1659 1660 if (*flags & MS_RDONLY) { 1661 /* 1662 * First of all, the unconditional stuff we have to do 1663 * to disable replay of the journal when we next remount 1664 */ 1665 sb->s_flags |= MS_RDONLY; 1666 1667 /* 1668 * OK, test if we are remounting a valid rw partition 1669 * readonly, and if so set the rdonly flag and then 1670 * mark the partition as valid again. 1671 */ 1672 if (!(es->s_state & cpu_to_le16(EXT3_VALID_FS)) && 1673 (sbi->s_mount_state & EXT3_VALID_FS)) 1674 es->s_state = cpu_to_le16(sbi->s_mount_state); 1675 1676 ext3_mark_recovery_complete(sb, es); 1677 } else { 1678 int ret; 1679 if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb, 1680 ~EXT3_FEATURE_RO_COMPAT_SUPP))) { 1681 printk(KERN_WARNING "EXT3-fs: %s: couldn't " 1682 "remount RDWR because of unsupported " 1683 "optional features (%x).\n", 1684 bdevname(sb->s_dev), ret); 1685 return -EROFS; 1686 } 1687 /* 1688 * Mounting a RDONLY partition read-write, so reread 1689 * and store the current valid flag. (It may have 1690 * been changed by e2fsck since we originally mounted 1691 * the partition.) 1692 */ 1693 ext3_clear_journal_err(sb, es); 1694 sbi->s_mount_state = le16_to_cpu(es->s_state); 1695 if (!ext3_setup_super (sb, es, 0)) 1696 sb->s_flags &= ~MS_RDONLY; 1697 } 1698 } 1699 setup_ro_after(sb); 1700 return 0; 1701} 1702 1703int ext3_statfs (struct super_block * sb, struct statfs * buf) 1704{ 1705 struct ext3_super_block *es = EXT3_SB(sb)->s_es; 1706 unsigned long overhead; 1707 int i; 1708 1709 if (test_opt (sb, MINIX_DF)) 1710 overhead = 0; 1711 else { 1712 /* 1713 * Compute the overhead (FS structures) 1714 */ 1715 1716 /* 1717 * All of the blocks before first_data_block are 1718 * overhead 1719 */ 1720 overhead = le32_to_cpu(es->s_first_data_block); 1721 1722 /* 1723 * Add the overhead attributed to the superblock and 1724 * block group descriptors. If the sparse superblocks 1725 * feature is turned on, then not all groups have this. 1726 */ 1727 for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) 1728 overhead += ext3_bg_has_super(sb, i) + 1729 ext3_bg_num_gdb(sb, i); 1730 1731 /* 1732 * Every block group has an inode bitmap, a block 1733 * bitmap, and an inode table. 1734 */ 1735 overhead += (EXT3_SB(sb)->s_groups_count * 1736 (2 + EXT3_SB(sb)->s_itb_per_group)); 1737 } 1738 1739 buf->f_type = EXT3_SUPER_MAGIC; 1740 buf->f_bsize = sb->s_blocksize; 1741 buf->f_blocks = le32_to_cpu(es->s_blocks_count) - overhead; 1742 buf->f_bfree = ext3_count_free_blocks (sb); 1743 buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count); 1744 if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count)) 1745 buf->f_bavail = 0; 1746 buf->f_files = le32_to_cpu(es->s_inodes_count); 1747 buf->f_ffree = ext3_count_free_inodes (sb); 1748 buf->f_namelen = EXT3_NAME_LEN; 1749 return 0; 1750} 1751 1752static DECLARE_FSTYPE_DEV(ext3_fs_type, "ext3", ext3_read_super); 1753 1754static int __init init_ext3_fs(void) 1755{ 1756 return register_filesystem(&ext3_fs_type); 1757} 1758 1759static void __exit exit_ext3_fs(void) 1760{ 1761 unregister_filesystem(&ext3_fs_type); 1762} 1763 1764EXPORT_NO_SYMBOLS; 1765 1766MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); 1767MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); 1768MODULE_LICENSE("GPL"); 1769module_init(init_ext3_fs) 1770module_exit(exit_ext3_fs) 1771