1/* 2 * linux/fs/recovery.c 3 * 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 5 * 6 * Copyright 1999-2000 Red Hat Software --- All Rights Reserved 7 * 8 * This file is part of the Linux kernel and is made available under 9 * the terms of the GNU General Public License, version 2, or at your 10 * option, any later version, incorporated herein by reference. 11 * 12 * Journal recovery routines for the generic filesystem journaling code; 13 * part of the ext2fs journaling system. 14 */ 15 16#include <linux/time.h> 17#include <linux/fs.h> 18#include <linux/errno.h> 19#include <linux/slab.h> 20#include "hfsplus_jbd.h" 21 22/* 23 * Maintain information about the progress of the recovery job, so that 24 * the different passes can carry information between them. 25 */ 26struct recovery_info 27{ 28 hfsplus_jbd_tid_t start_transaction; 29 hfsplus_jbd_tid_t end_transaction; 30 31 int nr_replays; 32 int nr_revokes; 33 int nr_revoke_hits; 34}; 35 36enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY}; 37static int do_one_pass(hfsplus_jbd_t *journal, 38 struct recovery_info *info, enum passtype pass); 39static int scan_revoke_records(hfsplus_jbd_t *, struct buffer_head *, 40 hfsplus_jbd_tid_t, struct recovery_info *); 41 42#ifdef __KERNEL__ 43 44/* Release readahead buffers after use */ 45void hfsplus_jbd_brelse_array(struct buffer_head *b[], int n) 46{ 47 while (--n >= 0) 48 brelse (b[n]); 49} 50 51 52/* 53 * When reading from the journal, we are going through the block device 54 * layer directly and so there is no readahead being done for us. We 55 * need to implement any readahead ourselves if we want it to happen at 56 * all. Recovery is basically one long sequential read, so make sure we 57 * do the IO in reasonably large chunks. 58 * 59 * This is not so critical that we need to be enormously clever about 60 * the readahead size, though. 128K is a purely arbitrary, good-enough 61 * fixed value. 62 */ 63 64#define MAXBUF 8 65static int do_readahead(hfsplus_jbd_t *journal, unsigned int start) 66{ 67 int err; 68 unsigned int max, nbufs, next; 69 unsigned long blocknr; 70 struct buffer_head *bh; 71 72 struct buffer_head * bufs[MAXBUF]; 73 74 /* Do up to 128K of readahead */ 75 max = start + (128 * 1024 / journal->j_blocksize); 76 if (max > journal->j_maxlen) 77 max = journal->j_maxlen; 78 79 /* Do the readahead itself. We'll submit MAXBUF buffer_heads at 80 * a time to the block device IO layer. */ 81 82 nbufs = 0; 83 84 for (next = start; next < max; next++) { 85 err = hfsplus_jbd_bmap(journal, next, &blocknr); 86 87 if (err) { 88 printk (KERN_ERR "JBD: bad block at offset %u\n", 89 next); 90 goto failed; 91 } 92 93 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 94 if (!bh) { 95 err = -ENOMEM; 96 goto failed; 97 } 98 99 if (!buffer_uptodate(bh) && !buffer_locked(bh)) { 100 bufs[nbufs++] = bh; 101 if (nbufs == MAXBUF) { 102 ll_rw_block(READ, nbufs, bufs); 103 hfsplus_jbd_brelse_array(bufs, nbufs); 104 nbufs = 0; 105 } 106 } else 107 brelse(bh); 108 } 109 110 if (nbufs) 111 ll_rw_block(READ, nbufs, bufs); 112 err = 0; 113 114failed: 115 if (nbufs) 116 hfsplus_jbd_brelse_array(bufs, nbufs); 117 return err; 118} 119 120#endif /* __KERNEL__ */ 121 122 123/* 124 * Read a block from the journal 125 */ 126 127static int jread(struct buffer_head **bhp, hfsplus_jbd_t *journal, 128 unsigned int offset) 129{ 130 int err; 131 unsigned long blocknr; 132 struct buffer_head *bh; 133 134 *bhp = NULL; 135 136 if (offset >= journal->j_maxlen) { 137 printk(KERN_ERR "JBD: corrupted journal superblock\n"); 138 return -EIO; 139 } 140 141 err = hfsplus_jbd_bmap(journal, offset, &blocknr); 142 143 if (err) { 144 printk (KERN_ERR "JBD: bad block at offset %u\n", 145 offset); 146 return err; 147 } 148 149 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 150 if (!bh) 151 return -ENOMEM; 152 153 if (!buffer_uptodate(bh)) { 154 /* If this is a brand new buffer, start readahead. 155 Otherwise, we assume we are already reading it. */ 156 if (!buffer_req(bh)) 157 do_readahead(journal, offset); 158 wait_on_buffer(bh); 159 } 160 161 if (!buffer_uptodate(bh)) { 162 printk (KERN_ERR "JBD: Failed to read block at offset %u\n", 163 offset); 164 brelse(bh); 165 return -EIO; 166 } 167 168 *bhp = bh; 169 return 0; 170} 171 172 173/* 174 * Count the number of in-use tags in a journal descriptor block. 175 */ 176 177static int count_tags(struct buffer_head *bh, int size) 178{ 179 char * tagp; 180 hfsplus_jbd_block_tag_t * tag; 181 int nr = 0; 182 183 tagp = &bh->b_data[sizeof(hfsplus_jbd_header_t)]; 184 185 while ((tagp - bh->b_data + sizeof(hfsplus_jbd_block_tag_t)) <= size) { 186 tag = (hfsplus_jbd_block_tag_t *) tagp; 187 188 nr++; 189 tagp += sizeof(hfsplus_jbd_block_tag_t); 190 if (!(tag->t_flags & cpu_to_be32(JFS_FLAG_SAME_UUID))) 191 tagp += 16; 192 193 if (tag->t_flags & cpu_to_be32(JFS_FLAG_LAST_TAG)) 194 break; 195 } 196 197 return nr; 198} 199 200 201/* Make sure we wrap around the log correctly! */ 202#define wrap(journal, var) \ 203do { \ 204 if (var >= (journal)->j_last) \ 205 var -= ((journal)->j_last - (journal)->j_first); \ 206} while (0) 207 208/** 209 * hfsplus_jbd_recover - recovers a on-disk journal 210 * @journal: the journal to recover 211 * 212 * The primary function for recovering the log contents when mounting a 213 * journaled device. 214 * 215 * Recovery is done in three passes. In the first pass, we look for the 216 * end of the log. In the second, we assemble the list of revoke 217 * blocks. In the third and final pass, we replay any un-revoked blocks 218 * in the log. 219 */ 220int hfsplus_jbd_recover(hfsplus_jbd_t *journal) 221{ 222 int err; 223 hfsplus_jbd_superblock_t * sb; 224 225 struct recovery_info info; 226 227 memset(&info, 0, sizeof(info)); 228 sb = journal->j_superblock; 229 230 /* 231 * The journal superblock's s_start field (the current log head) 232 * is always zero if, and only if, the journal was cleanly 233 * unmounted. 234 */ 235 236 if (!sb->s_start) { 237 hfsplus_jbd_debug(1, "No recovery required, last transaction %d\n", 238 be32_to_cpu(sb->s_sequence)); 239 journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1; 240 return 0; 241 } 242 243 err = do_one_pass(journal, &info, PASS_SCAN); 244 if (!err) 245 err = do_one_pass(journal, &info, PASS_REVOKE); 246 if (!err) 247 err = do_one_pass(journal, &info, PASS_REPLAY); 248 249 hfsplus_jbd_debug(0, "JBD: recovery, exit status %d, " 250 "recovered transactions %u to %u\n", 251 err, info.start_transaction, info.end_transaction); 252 hfsplus_jbd_debug(0, "JBD: Replayed %d and revoked %d/%d blocks\n", 253 info.nr_replays, info.nr_revoke_hits, info.nr_revokes); 254 255 /* Restart the log at the next transaction ID, thus invalidating 256 * any existing commit records in the log. */ 257 journal->j_transaction_sequence = ++info.end_transaction; 258 259 hfsplus_jbd_clear_revoke(journal); 260 sync_blockdev(journal->j_fs_dev); 261 return err; 262} 263 264/** 265 * hfsplus_jbd_skip_recovery - Start journal and wipe exiting records 266 * @journal: journal to startup 267 * 268 * Locate any valid recovery information from the journal and set up the 269 * journal structures in memory to ignore it (presumably because the 270 * caller has evidence that it is out of date). 271 * This function does'nt appear to be exorted.. 272 * 273 * We perform one pass over the journal to allow us to tell the user how 274 * much recovery information is being erased, and to let us initialise 275 * the journal transaction sequence numbers to the next unused ID. 276 */ 277int hfsplus_jbd_skip_recovery(hfsplus_jbd_t *journal) 278{ 279 int err; 280 hfsplus_jbd_superblock_t * sb; 281 282 struct recovery_info info; 283 284 memset (&info, 0, sizeof(info)); 285 sb = journal->j_superblock; 286 287 err = do_one_pass(journal, &info, PASS_SCAN); 288 289 if (err) { 290 printk(KERN_ERR "JBD: error %d scanning journal\n", err); 291 ++journal->j_transaction_sequence; 292 } else { 293 int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence); 294 hfsplus_jbd_debug(0, 295 "JBD: ignoring %d transaction%s from the journal.\n", 296 dropped, (dropped == 1) ? "" : "s"); 297 journal->j_transaction_sequence = ++info.end_transaction; 298 } 299 300 journal->j_tail = 0; 301 return err; 302} 303 304static int do_one_pass(hfsplus_jbd_t *journal, 305 struct recovery_info *info, enum passtype pass) 306{ 307 unsigned int first_commit_ID, next_commit_ID; 308 unsigned long next_log_block; 309 int err, success = 0; 310 hfsplus_jbd_superblock_t * sb; 311 hfsplus_jbd_header_t * tmp; 312 struct buffer_head * bh; 313 unsigned int sequence; 314 int blocktype; 315 316 /* Precompute the maximum metadata descriptors in a descriptor block */ 317 int MAX_BLOCKS_PER_DESC; 318 MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(hfsplus_jbd_header_t)) 319 / sizeof(hfsplus_jbd_block_tag_t)); 320 321 /* 322 * First thing is to establish what we expect to find in the log 323 * (in terms of transaction IDs), and where (in terms of log 324 * block offsets): query the superblock. 325 */ 326 327 sb = journal->j_superblock; 328 next_commit_ID = be32_to_cpu(sb->s_sequence); 329 next_log_block = be32_to_cpu(sb->s_start); 330 331 first_commit_ID = next_commit_ID; 332 if (pass == PASS_SCAN) 333 info->start_transaction = first_commit_ID; 334 335 hfsplus_jbd_debug(1, "Starting recovery pass %d\n", pass); 336 337 /* 338 * Now we walk through the log, transaction by transaction, 339 * making sure that each transaction has a commit block in the 340 * expected place. Each complete transaction gets replayed back 341 * into the main filesystem. 342 */ 343 344 while (1) { 345 int flags; 346 char * tagp; 347 hfsplus_jbd_block_tag_t * tag; 348 struct buffer_head * obh; 349 struct buffer_head * nbh; 350 351 cond_resched(); /* We're under lock_kernel() */ 352 353 /* If we already know where to stop the log traversal, 354 * check right now that we haven't gone past the end of 355 * the log. */ 356 357 if (pass != PASS_SCAN) 358 if (hfsplus_tid_geq(next_commit_ID, info->end_transaction)) 359 break; 360 361 hfsplus_jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n", 362 next_commit_ID, next_log_block, journal->j_last); 363 364 /* Skip over each chunk of the transaction looking 365 * either the next descriptor block or the final commit 366 * record. */ 367 368 hfsplus_jbd_debug(3, "JBD: checking block %ld\n", next_log_block); 369 err = jread(&bh, journal, next_log_block); 370 if (err) 371 goto failed; 372 373 next_log_block++; 374 wrap(journal, next_log_block); 375 376 /* What kind of buffer is it? 377 * 378 * If it is a descriptor block, check that it has the 379 * expected sequence number. Otherwise, we're all done 380 * here. */ 381 382 tmp = (hfsplus_jbd_header_t *)bh->b_data; 383 384 if (tmp->h_magic != cpu_to_be32(JFS_MAGIC_NUMBER)) { 385printk("@@@@@@@ Oops! .. Need to check it function: %s, Line: %d\n", __FUNCTION__, __LINE__); 386 brelse(bh); 387 break; 388 } 389 390 blocktype = be32_to_cpu(tmp->h_blocktype); 391 sequence = be32_to_cpu(tmp->h_sequence); 392 hfsplus_jbd_debug(3, "Found magic %d, sequence %d\n", 393 blocktype, sequence); 394 395 if (sequence != next_commit_ID) { 396 brelse(bh); 397 break; 398 } 399 400 /* OK, we have a valid descriptor block which matches 401 * all of the sequence number checks. What are we going 402 * to do with it? That depends on the pass... */ 403 404 switch(blocktype) { 405 case JFS_DESCRIPTOR_BLOCK: 406 /* If it is a valid descriptor block, replay it 407 * in pass REPLAY; otherwise, just skip over the 408 * blocks it describes. */ 409 if (pass != PASS_REPLAY) { 410 next_log_block += 411 count_tags(bh, journal->j_blocksize); 412 wrap(journal, next_log_block); 413 brelse(bh); 414 continue; 415 } 416 417 /* A descriptor block: we can now write all of 418 * the data blocks. Yay, useful work is finally 419 * getting done here! */ 420 421 tagp = &bh->b_data[sizeof(hfsplus_jbd_header_t)]; 422 while ((tagp - bh->b_data +sizeof(hfsplus_jbd_block_tag_t)) 423 <= journal->j_blocksize) { 424 unsigned long io_block; 425 426 tag = (hfsplus_jbd_block_tag_t *) tagp; 427 flags = be32_to_cpu(tag->t_flags); 428 429 io_block = next_log_block++; 430 wrap(journal, next_log_block); 431 err = jread(&obh, journal, io_block); 432 if (err) { 433 /* Recover what we can, but 434 * report failure at the end. */ 435 success = err; 436 printk (KERN_ERR 437 "JBD: IO error %d recovering " 438 "block %ld in log\n", 439 err, io_block); 440 } else { 441 unsigned long blocknr; 442 443 HFSPLUS_J_ASSERT(obh != NULL); 444 blocknr = be32_to_cpu(tag->t_blocknr); 445 446 /* If the block has been 447 * revoked, then we're all done 448 * here. */ 449 if (hfsplus_jbd_test_revoke 450 (journal, blocknr, 451 next_commit_ID)) { 452 brelse(obh); 453 ++info->nr_revoke_hits; 454 goto skip_write; 455 } 456 457 /* Find a buffer for the new 458 * data being restored */ 459 nbh = __getblk(journal->j_fs_dev, 460 blocknr, 461 journal->j_blocksize); 462 if (nbh == NULL) { 463 printk(KERN_ERR 464 "JBD: Out of memory " 465 "during recovery.\n"); 466 err = -ENOMEM; 467 brelse(bh); 468 brelse(obh); 469 goto failed; 470 } 471 472 lock_buffer(nbh); 473 memcpy(nbh->b_data, obh->b_data, 474 journal->j_blocksize); 475 if (flags & JFS_FLAG_ESCAPE) { 476printk("@@@@@@@ Oops! .. Need to check it function: %s, Line: %d\n", __FUNCTION__, __LINE__); 477 *((__be32 *)bh->b_data) = 478 cpu_to_be32(JFS_MAGIC_NUMBER); 479 } 480 481 HFSPLUS_BUFFER_TRACE(nbh, "marking dirty"); 482 set_buffer_uptodate(nbh); 483 mark_buffer_dirty(nbh); 484 HFSPLUS_BUFFER_TRACE(nbh, "marking uptodate"); 485 ++info->nr_replays; 486 /* ll_rw_block(WRITE, 1, &nbh); */ 487 unlock_buffer(nbh); 488 brelse(obh); 489 brelse(nbh); 490 } 491 492 skip_write: 493 tagp += sizeof(hfsplus_jbd_block_tag_t); 494 if (!(flags & JFS_FLAG_SAME_UUID)) 495 tagp += 16; 496 497 if (flags & JFS_FLAG_LAST_TAG) 498 break; 499 } 500 501 brelse(bh); 502 continue; 503 504 case JFS_COMMIT_BLOCK: 505 /* Found an expected commit block: not much to 506 * do other than move on to the next sequence 507 * number. */ 508 brelse(bh); 509 next_commit_ID++; 510 continue; 511 512 case JFS_REVOKE_BLOCK: 513 /* If we aren't in the REVOKE pass, then we can 514 * just skip over this block. */ 515 if (pass != PASS_REVOKE) { 516 brelse(bh); 517 continue; 518 } 519 520 err = scan_revoke_records(journal, bh, 521 next_commit_ID, info); 522 brelse(bh); 523 if (err) 524 goto failed; 525 continue; 526 527 default: 528 hfsplus_jbd_debug(3, "Unrecognised magic %d, end of scan.\n", 529 blocktype); 530 goto done; 531 } 532 } 533 534 done: 535 /* 536 * We broke out of the log scan loop: either we came to the 537 * known end of the log or we found an unexpected block in the 538 * log. If the latter happened, then we know that the "current" 539 * transaction marks the end of the valid log. 540 */ 541 542 if (pass == PASS_SCAN) 543 info->end_transaction = next_commit_ID; 544 else { 545 /* It's really bad news if different passes end up at 546 * different places (but possible due to IO errors). */ 547 if (info->end_transaction != next_commit_ID) { 548 printk (KERN_ERR "JBD: recovery pass %d ended at " 549 "transaction %u, expected %u\n", 550 pass, next_commit_ID, info->end_transaction); 551 if (!success) 552 success = -EIO; 553 } 554 } 555 556 return success; 557 558 failed: 559 return err; 560} 561 562 563/* Scan a revoke record, marking all blocks mentioned as revoked. */ 564 565static int scan_revoke_records(hfsplus_jbd_t *journal, struct buffer_head *bh, 566 hfsplus_jbd_tid_t sequence, struct recovery_info *info) 567{ 568 hfsplus_jbd_revoke_header_t *header; 569 int offset, max; 570 571 header = (hfsplus_jbd_revoke_header_t *) bh->b_data; 572 offset = sizeof(hfsplus_jbd_revoke_header_t); 573 max = be32_to_cpu(header->r_count); 574 575 while (offset < max) { 576 unsigned long blocknr; 577 int err; 578 579 blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset))); 580 offset += 4; 581 err = hfsplus_jbd_set_revoke(journal, blocknr, sequence); 582 if (err) 583 return err; 584 ++info->nr_revokes; 585 } 586 return 0; 587} 588