1/* 2 * linux/fs/checkpoint.c 3 * 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 5 * 6 * Copyright 1999 Red Hat Software --- All Rights Reserved 7 * 8 * This file is part of the Linux kernel and is made available under 9 * the terms of the GNU General Public License, version 2, or at your 10 * option, any later version, incorporated herein by reference. 11 * 12 * Checkpoint routines for the generic filesystem journaling code. 13 * Part of the ext2fs journaling system. 14 * 15 * Checkpointing is the process of ensuring that a section of the log is 16 * committed fully to disk, so that that portion of the log can be 17 * reused. 18 */ 19 20#include <linux/time.h> 21#include <linux/fs.h> 22#include "hfsplus_jbd.h" 23#include <linux/errno.h> 24#include <linux/slab.h> 25#include "hfsplus_fs.h" 26 27/* 28 * Unlink a buffer from a transaction. 29 * 30 * Called with j_list_lock held. 31 */ 32 33static inline void __buffer_unlink(struct hfsplus_jbd_head *jh) 34{ 35 hfsplus_transaction_t *transaction; 36 37 transaction = jh->b_cp_transaction; 38 jh->b_cp_transaction = NULL; 39 40 jh->b_cpnext->b_cpprev = jh->b_cpprev; 41 jh->b_cpprev->b_cpnext = jh->b_cpnext; 42 if (transaction->t_checkpoint_list == jh) 43 transaction->t_checkpoint_list = jh->b_cpnext; 44 if (transaction->t_checkpoint_list == jh) 45 transaction->t_checkpoint_list = NULL; 46} 47 48/* 49 * Try to release a checkpointed buffer from its transaction. 50 * Returns 1 if we released it. 51 * Requires j_list_lock 52 * Called under hfsplus_jbd_lock_bh_state(hfsplus_jh2bh(jh)), and drops it 53 */ 54static int __try_to_free_cp_buf(struct hfsplus_jbd_head *jh) 55{ 56 int ret = 0; 57 struct buffer_head *bh = hfsplus_jh2bh(jh); 58 59 if (jh->b_jlist == HFSPLUS_BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { 60 HFSPLUS_JBUFFER_TRACE(jh, "remove from checkpoint list"); 61 __hfsplus_jbd_remove_checkpoint(jh); 62 hfsplus_jbd_unlock_bh_state(bh); 63 hfsplus_jbd_remove_journal_head(bh); 64 HFSPLUS_BUFFER_TRACE(bh, "release"); 65 __brelse(bh); 66 ret = 1; 67 } else { 68 hfsplus_jbd_unlock_bh_state(bh); 69 } 70 return ret; 71} 72 73/* 74 * __hfsplus__log_wait_for_space: wait until there is space in the journal. 75 * 76 * Called under j-state_lock *only*. It will be unlocked if we have to wait 77 * for a checkpoint to free up some space in the log. 78 */ 79void __hfsplus__log_wait_for_space(hfsplus_jbd_t *journal) 80{ 81 int nblocks; 82 assert_spin_locked(&journal->j_state_lock); 83 84 nblocks = hfsplus_jbd_space_needed(journal); 85 while (__hfsplus__log_space_left(journal) < nblocks) { 86 if (journal->j_flags & JFS_ABORT) 87 return; 88 spin_unlock(&journal->j_state_lock); 89 down(&journal->j_checkpoint_sem); 90 91 /* 92 * Test again, another process may have checkpointed while we 93 * were waiting for the checkpoint lock 94 */ 95 spin_lock(&journal->j_state_lock); 96 nblocks = hfsplus_jbd_space_needed(journal); 97 if (__hfsplus__log_space_left(journal) < nblocks) { 98 spin_unlock(&journal->j_state_lock); 99 hfsplus_jbd_log_do_checkpoint(journal); 100 spin_lock(&journal->j_state_lock); 101 } 102 up(&journal->j_checkpoint_sem); 103 } 104} 105 106/* 107 * We were unable to perform hfsplus_jbd_trylock_bh_state() inside j_list_lock. 108 * The caller must restart a list walk. Wait for someone else to run 109 * hfsplus_jbd_unlock_bh_state(). 110 */ 111static void jbd_sync_bh(hfsplus_jbd_t *journal, struct buffer_head *bh) 112{ 113 get_bh(bh); 114 spin_unlock(&journal->j_list_lock); 115 hfsplus_jbd_lock_bh_state(bh); 116 hfsplus_jbd_unlock_bh_state(bh); 117 put_bh(bh); 118} 119 120/* 121 * Clean up a transaction's checkpoint list. 122 * 123 * We wait for any pending IO to complete and make sure any clean 124 * buffers are removed from the transaction. 125 * 126 * Return 1 if we performed any actions which might have destroyed the 127 * checkpoint. (hfsplus_jbd_remove_checkpoint() deletes the transaction when 128 * the last checkpoint buffer is cleansed) 129 * 130 * Called with j_list_lock held. 131 */ 132static int __cleanup_transaction(hfsplus_jbd_t *journal, hfsplus_transaction_t *transaction) 133{ 134 struct hfsplus_jbd_head *jh, *next_jh, *last_jh; 135 struct buffer_head *bh; 136 int ret = 0; 137 138 assert_spin_locked(&journal->j_list_lock); 139 jh = transaction->t_checkpoint_list; 140 if (!jh) 141 return 0; 142 143 last_jh = jh->b_cpprev; 144 next_jh = jh; 145 do { 146 jh = next_jh; 147 bh = hfsplus_jh2bh(jh); 148 if (buffer_locked(bh)) { 149 atomic_inc(&bh->b_count); 150 spin_unlock(&journal->j_list_lock); 151 wait_on_buffer(bh); 152 /* the hfsplus_jbd_head may have gone by now */ 153 HFSPLUS_BUFFER_TRACE(bh, "brelse"); 154 __brelse(bh); 155 goto out_return_1; 156 } 157 158 /* 159 * This is foul 160 */ 161 if (!hfsplus_jbd_trylock_bh_state(bh)) { 162 jbd_sync_bh(journal, bh); 163 goto out_return_1; 164 } 165 166 if (jh->b_transaction != NULL) { 167 hfsplus_transaction_t *t = jh->b_transaction; 168 hfsplus_jbd_tid_t tid = t->t_tid; 169 170 spin_unlock(&journal->j_list_lock); 171 hfsplus_jbd_unlock_bh_state(bh); 172 hfsplus_log_start_commit(journal, tid); 173 hfsplus_jbd_log_wait_commit(journal, tid); 174 goto out_return_1; 175 } 176 177 /* 178 * AKPM: I think the buffer_hfsplus_jbddirty test is redundant - it 179 * shouldn't have NULL b_transaction? 180 */ 181 next_jh = jh->b_cpnext; 182 if (!buffer_dirty(bh) && !buffer_hfsplus_jbddirty(bh)) { 183 HFSPLUS_BUFFER_TRACE(bh, "remove from checkpoint"); 184 __hfsplus_jbd_remove_checkpoint(jh); 185 hfsplus_jbd_unlock_bh_state(bh); 186 hfsplus_jbd_remove_journal_head(bh); 187 __brelse(bh); 188 ret = 1; 189 } else { 190 hfsplus_jbd_unlock_bh_state(bh); 191 } 192 } while (jh != last_jh); 193 194 return ret; 195out_return_1: 196 spin_lock(&journal->j_list_lock); 197 return 1; 198} 199 200#define NR_BATCH 64 201 202static void 203__flush_batch(hfsplus_jbd_t *journal, struct buffer_head **bhs, int *batch_count) 204{ 205 int i; 206 207 spin_unlock(&journal->j_list_lock); 208 ll_rw_block(SWRITE, *batch_count, bhs); 209 spin_lock(&journal->j_list_lock); 210 for (i = 0; i < *batch_count; i++) { 211 struct buffer_head *bh = bhs[i]; 212 clear_buffer_hfsplus_jbd_jwrite(bh); 213 HFSPLUS_BUFFER_TRACE(bh, "brelse"); 214 __brelse(bh); 215 } 216 *batch_count = 0; 217} 218 219/* 220 * Try to flush one buffer from the checkpoint list to disk. 221 * 222 * Return 1 if something happened which requires us to abort the current 223 * scan of the checkpoint list. 224 * 225 * Called with j_list_lock held. 226 * Called under hfsplus_jbd_lock_bh_state(hfsplus_jh2bh(jh)), and drops it 227 */ 228static int __flush_buffer(hfsplus_jbd_t *journal, struct hfsplus_jbd_head *jh, 229 struct buffer_head **bhs, int *batch_count, 230 int *drop_count) 231{ 232 struct buffer_head *bh = hfsplus_jh2bh(jh); 233 int ret = 0; 234 235 if (buffer_dirty(bh) && !buffer_locked(bh) && jh->b_jlist == HFSPLUS_BJ_None) { 236 HFSPLUS_J_ASSERT_JH(jh, jh->b_transaction == NULL); 237 238 /* 239 * Important: we are about to write the buffer, and 240 * possibly block, while still holding the journal lock. 241 * We cannot afford to let the transaction logic start 242 * messing around with this buffer before we write it to 243 * disk, as that would break recoverability. 244 */ 245 HFSPLUS_BUFFER_TRACE(bh, "queue"); 246 get_bh(bh); 247 HFSPLUS_J_ASSERT_BH(bh, !buffer_hfsplus_jbd_jwrite(bh)); 248 set_buffer_hfsplus_jbd_jwrite(bh); 249 bhs[*batch_count] = bh; 250 hfsplus_jbd_unlock_bh_state(bh); 251 (*batch_count)++; 252 if (*batch_count == NR_BATCH) { 253 __flush_batch(journal, bhs, batch_count); 254 ret = 1; 255 } 256 } else { 257 int last_buffer = 0; 258 if (jh->b_cpnext == jh) { 259 /* We may be about to drop the transaction. Tell the 260 * caller that the lists have changed. 261 */ 262 last_buffer = 1; 263 } 264 if (__try_to_free_cp_buf(jh)) { 265 (*drop_count)++; 266 ret = last_buffer; 267 } 268 } 269 return ret; 270} 271 272/* 273 * Perform an actual checkpoint. We don't write out only enough to 274 * satisfy the current blocked requests: rather we submit a reasonably 275 * sized chunk of the outstanding data to disk at once for 276 * efficiency. __hfsplus__log_wait_for_space() will retry if we didn't free enough. 277 * 278 * However, we _do_ take into account the amount requested so that once 279 * the IO has been queued, we can return as soon as enough of it has 280 * completed to disk. 281 * 282 * The journal should be locked before calling this function. 283 */ 284int hfsplus_jbd_log_do_checkpoint(hfsplus_jbd_t *journal) 285{ 286 int result; 287 int batch_count = 0; 288 struct buffer_head *bhs[NR_BATCH]; 289 290 dprint(DBG_JCHKPT, "Start checkpoint\n"); 291 292 /* 293 * First thing: if there are any transactions in the log which 294 * don't need checkpointing, just eliminate them from the 295 * journal straight away. 296 */ 297 result = cleanup_hfsplus_jbd_tail(journal); 298 dprint(DBG_JCHKPT, "cleanup_hfsplus_jbd_tail returned %d\n", result); 299 if (result <= 0) 300 return result; 301 302 /* 303 * OK, we need to start writing disk blocks. Try to free up a 304 * quarter of the log in a single checkpoint if we can. 305 */ 306 /* 307 * AKPM: check this code. I had a feeling a while back that it 308 * degenerates into a busy loop at unmount time. 309 */ 310 spin_lock(&journal->j_list_lock); 311 while (journal->j_checkpoint_transactions) { 312 hfsplus_transaction_t *transaction; 313 struct hfsplus_jbd_head *jh, *last_jh, *next_jh; 314 int drop_count = 0; 315 int cleanup_ret, retry = 0; 316 hfsplus_jbd_tid_t this_tid; 317 318 transaction = journal->j_checkpoint_transactions; 319 this_tid = transaction->t_tid; 320 jh = transaction->t_checkpoint_list; 321 last_jh = jh->b_cpprev; 322 next_jh = jh; 323 do { 324 struct buffer_head *bh; 325 326 jh = next_jh; 327 next_jh = jh->b_cpnext; 328 bh = hfsplus_jh2bh(jh); 329 if (!hfsplus_jbd_trylock_bh_state(bh)) { 330 jbd_sync_bh(journal, bh); 331 spin_lock(&journal->j_list_lock); 332 retry = 1; 333 break; 334 } 335 retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count); 336 if (cond_resched_lock(&journal->j_list_lock)) { 337 retry = 1; 338 break; 339 } 340 } while (jh != last_jh && !retry); 341 342 if (batch_count) { 343 __flush_batch(journal, bhs, &batch_count); 344 retry = 1; 345 } 346 347 /* 348 * If someone cleaned up this transaction while we slept, we're 349 * done 350 */ 351 if (journal->j_checkpoint_transactions != transaction) 352 break; 353 if (retry) 354 continue; 355 /* 356 * Maybe it's a new transaction, but it fell at the same 357 * address 358 */ 359 if (transaction->t_tid != this_tid) 360 continue; 361 /* 362 * We have walked the whole transaction list without 363 * finding anything to write to disk. We had better be 364 * able to make some progress or we are in trouble. 365 */ 366 cleanup_ret = __cleanup_transaction(journal, transaction); 367 HFSPLUS_J_ASSERT(drop_count != 0 || cleanup_ret != 0); 368 if (journal->j_checkpoint_transactions != transaction) 369 break; 370 } 371 spin_unlock(&journal->j_list_lock); 372 result = cleanup_hfsplus_jbd_tail(journal); 373 if (result < 0) 374 return result; 375 376 return 0; 377} 378 379/* 380 * Check the list of checkpoint transactions for the journal to see if 381 * we have already got rid of any since the last update of the log tail 382 * in the journal superblock. If so, we can instantly roll the 383 * superblock forward to remove those transactions from the log. 384 * 385 * Return <0 on error, 0 on success, 1 if there was nothing to clean up. 386 * 387 * Called with the journal lock held. 388 * 389 * This is the only part of the journaling code which really needs to be 390 * aware of transaction aborts. Checkpointing involves writing to the 391 * main filesystem area rather than to the journal, so it can proceed 392 * even in abort state, but we must not update the journal superblock if 393 * we have an abort error outstanding. 394 */ 395 396int cleanup_hfsplus_jbd_tail(hfsplus_jbd_t *journal) 397{ 398 hfsplus_transaction_t * transaction; 399 hfsplus_jbd_tid_t first_tid; 400 unsigned long blocknr, freed; 401 402 /* OK, work out the oldest transaction remaining in the log, and 403 * the log block it starts at. 404 * 405 * If the log is now empty, we need to work out which is the 406 * next transaction ID we will write, and where it will 407 * start. */ 408 409 spin_lock(&journal->j_state_lock); 410 spin_lock(&journal->j_list_lock); 411 transaction = journal->j_checkpoint_transactions; 412 if (transaction) { 413 first_tid = transaction->t_tid; 414 blocknr = transaction->t_log_start; 415 } else if ((transaction = journal->j_committing_transaction) != NULL) { 416 first_tid = transaction->t_tid; 417 blocknr = transaction->t_log_start; 418 } else if ((transaction = journal->j_running_transaction) != NULL) { 419 first_tid = transaction->t_tid; 420 blocknr = journal->j_head; 421 } else { 422 first_tid = journal->j_transaction_sequence; 423 blocknr = journal->j_head; 424 } 425 spin_unlock(&journal->j_list_lock); 426 HFSPLUS_J_ASSERT(blocknr != 0); 427 428 /* If the oldest pinned transaction is at the tail of the log 429 already then there's not much we can do right now. */ 430 if (journal->j_tail_sequence == first_tid) { 431 spin_unlock(&journal->j_state_lock); 432 return 1; 433 } 434 435 /* OK, update the superblock to recover the freed space. 436 * Physical blocks come first: have we wrapped beyond the end of 437 * the log? */ 438 freed = blocknr - journal->j_tail; 439 if (blocknr < journal->j_tail) 440 freed = freed + journal->j_last - journal->j_first; 441 442#ifdef HFSPLUS_JOURNAL_MAC_COMPATIBLE 443 hfsplus_journal_header_start_update(journal, freed); 444#endif 445 446 dprint(DBG_JCHKPT, 447 "Cleaning journal tail from %d to %d (offset %lu), " 448 "freeing %lu\n", 449 journal->j_tail_sequence, first_tid, blocknr, freed); 450 451 journal->j_free += freed; 452 journal->j_tail_sequence = first_tid; 453 journal->j_tail = blocknr; 454 spin_unlock(&journal->j_state_lock); 455 if (!(journal->j_flags & JFS_ABORT)) 456 hfsplus_jbd_update_superblock(journal, 1); 457 return 0; 458} 459 460 461/* Checkpoint list management */ 462 463/* 464 * hfsplus_jbd_clean_checkpoint_list 465 * 466 * Find all the written-back checkpoint buffers in the journal and release them. 467 * 468 * Called with the journal locked. 469 * Called with j_list_lock held. 470 * Returns number of bufers reaped (for debug) 471 */ 472 473int __hfsplus_jbd_clean_checkpoint_list(hfsplus_jbd_t *journal) 474{ 475 hfsplus_transaction_t *transaction, *last_transaction, *next_transaction; 476 int ret = 0; 477 478 transaction = journal->j_checkpoint_transactions; 479 if (transaction == 0) 480 goto out; 481 482 last_transaction = transaction->t_cpprev; 483 next_transaction = transaction; 484 do { 485 struct hfsplus_jbd_head *jh; 486 487 transaction = next_transaction; 488 next_transaction = transaction->t_cpnext; 489 jh = transaction->t_checkpoint_list; 490 if (jh) { 491 struct hfsplus_jbd_head *last_jh = jh->b_cpprev; 492 struct hfsplus_jbd_head *next_jh = jh; 493 494 do { 495 jh = next_jh; 496 next_jh = jh->b_cpnext; 497 /* Use trylock because of the ranknig */ 498 if (hfsplus_jbd_trylock_bh_state(hfsplus_jh2bh(jh))) 499 ret += __try_to_free_cp_buf(jh); 500 /* 501 * This function only frees up some memory 502 * if possible so we dont have an obligation 503 * to finish processing. Bail out if preemption 504 * requested: 505 */ 506 if (need_resched()) 507 goto out; 508 } while (jh != last_jh); 509 } 510 } while (transaction != last_transaction); 511out: 512 return ret; 513} 514 515/* 516 * hfsplus_jbd_remove_checkpoint: called after a buffer has been committed 517 * to disk (either by being write-back flushed to disk, or being 518 * committed to the log). 519 * 520 * We cannot safely clean a transaction out of the log until all of the 521 * buffer updates committed in that transaction have safely been stored 522 * elsewhere on disk. To achieve this, all of the buffers in a 523 * transaction need to be maintained on the transaction's checkpoint 524 * list until they have been rewritten, at which point this function is 525 * called to remove the buffer from the existing transaction's 526 * checkpoint list. 527 * 528 * This function is called with the journal locked. 529 * This function is called with j_list_lock held. 530 */ 531 532void __hfsplus_jbd_remove_checkpoint(struct hfsplus_jbd_head *jh) 533{ 534 hfsplus_transaction_t *transaction; 535 hfsplus_jbd_t *journal; 536 537 HFSPLUS_JBUFFER_TRACE(jh, "entry"); 538 539 if ((transaction = jh->b_cp_transaction) == NULL) { 540 HFSPLUS_JBUFFER_TRACE(jh, "not on transaction"); 541 goto out; 542 } 543 journal = transaction->t_journal; 544 545 __buffer_unlink(jh); 546 547 if (transaction->t_checkpoint_list != NULL) 548 goto out; 549 HFSPLUS_JBUFFER_TRACE(jh, "transaction has no more buffers"); 550 551 /* 552 * There is one special case to worry about: if we have just pulled the 553 * buffer off a committing transaction's forget list, then even if the 554 * checkpoint list is empty, the transaction obviously cannot be 555 * dropped! 556 * 557 * The locking here around j_committing_transaction is a bit sleazy. 558 * See the comment at the end of hfsplus_jbd_commit_transaction(). 559 */ 560 if (transaction == journal->j_committing_transaction) { 561 HFSPLUS_JBUFFER_TRACE(jh, "belongs to committing transaction"); 562 goto out; 563 } 564 565 /* OK, that was the last buffer for the transaction: we can now 566 safely remove this transaction from the log */ 567 568 __hfsplus_jbd_drop_transaction(journal, transaction); 569 570 /* Just in case anybody was waiting for more transactions to be 571 checkpointed... */ 572 wake_up(&journal->j_wait_logspace); 573out: 574 HFSPLUS_JBUFFER_TRACE(jh, "exit"); 575} 576 577/* 578 * hfsplus_jbd_insert_checkpoint: put a committed buffer onto a checkpoint 579 * list so that we know when it is safe to clean the transaction out of 580 * the log. 581 * 582 * Called with the journal locked. 583 * Called with j_list_lock held. 584 */ 585void __hfsplus_jbd_insert_checkpoint(struct hfsplus_jbd_head *jh, 586 hfsplus_transaction_t *transaction) 587{ 588 HFSPLUS_JBUFFER_TRACE(jh, "entry"); 589 HFSPLUS_J_ASSERT_JH(jh, buffer_dirty(hfsplus_jh2bh(jh)) || buffer_hfsplus_jbddirty(hfsplus_jh2bh(jh))); 590 HFSPLUS_J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); 591 592 jh->b_cp_transaction = transaction; 593 594 if (!transaction->t_checkpoint_list) { 595 jh->b_cpnext = jh->b_cpprev = jh; 596 } else { 597 jh->b_cpnext = transaction->t_checkpoint_list; 598 jh->b_cpprev = transaction->t_checkpoint_list->b_cpprev; 599 jh->b_cpprev->b_cpnext = jh; 600 jh->b_cpnext->b_cpprev = jh; 601 } 602 transaction->t_checkpoint_list = jh; 603} 604 605/* 606 * We've finished with this transaction structure: adios... 607 * 608 * The transaction must have no links except for the checkpoint by this 609 * point. 610 * 611 * Called with the journal locked. 612 * Called with j_list_lock held. 613 */ 614 615void __hfsplus_jbd_drop_transaction(hfsplus_jbd_t *journal, hfsplus_transaction_t *transaction) 616{ 617 assert_spin_locked(&journal->j_list_lock); 618 if (transaction->t_cpnext) { 619 transaction->t_cpnext->t_cpprev = transaction->t_cpprev; 620 transaction->t_cpprev->t_cpnext = transaction->t_cpnext; 621 if (journal->j_checkpoint_transactions == transaction) 622 journal->j_checkpoint_transactions = 623 transaction->t_cpnext; 624 if (journal->j_checkpoint_transactions == transaction) 625 journal->j_checkpoint_transactions = NULL; 626 } 627 628 HFSPLUS_J_ASSERT(transaction->t_state == HFSPLUS_T_FINISHED); 629 HFSPLUS_J_ASSERT(transaction->t_buffers == NULL); 630 HFSPLUS_J_ASSERT(transaction->t_sync_datalist == NULL); 631 HFSPLUS_J_ASSERT(transaction->t_forget == NULL); 632 HFSPLUS_J_ASSERT(transaction->t_iobuf_list == NULL); 633 HFSPLUS_J_ASSERT(transaction->t_shadow_list == NULL); 634 HFSPLUS_J_ASSERT(transaction->t_log_list == NULL); 635 HFSPLUS_J_ASSERT(transaction->t_checkpoint_list == NULL); 636 HFSPLUS_J_ASSERT(transaction->t_updates == 0); 637 HFSPLUS_J_ASSERT(journal->j_committing_transaction != transaction); 638 HFSPLUS_J_ASSERT(journal->j_running_transaction != transaction); 639 640 dprint(DBG_JCHKPT, "Dropping transaction %d, all done\n", transaction->t_tid); 641 kfree(transaction); 642} 643