1/* 2 * linux/fs/commit.c 3 * 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998 5 * 6 * Copyright 1998 Red Hat corp --- All Rights Reserved 7 * 8 * This file is part of the Linux kernel and is made available under 9 * the terms of the GNU General Public License, version 2, or at your 10 * option, any later version, incorporated herein by reference. 11 * 12 * Journal commit routines for the generic filesystem journaling code; 13 * part of the ext2fs journaling system. 14 */ 15 16#include <linux/time.h> 17#include <linux/fs.h> 18#include <linux/errno.h> 19#include <linux/slab.h> 20#include <linux/mm.h> 21#include <linux/pagemap.h> 22#include <linux/smp_lock.h> 23#include "hfsplus_jbd.h" 24#include "hfsplus_fs.h" 25 26/* 27 * Default IO end handler for temporary HFSPLUS_BJ_IO buffer_heads. 28 */ 29static void hfsplus_jbd_end_buffer_io_sync(struct buffer_head *bh, int uptodate) 30{ 31 HFSPLUS_BUFFER_TRACE(bh, ""); 32 if (uptodate) 33 set_buffer_uptodate(bh); 34 else 35 clear_buffer_uptodate(bh); 36 unlock_buffer(bh); 37} 38 39/* 40 * When an ext3-ordered file is truncated, it is possible that many pages are 41 * not sucessfully freed, because they are attached to a committing transaction. 42 * After the transaction commits, these pages are left on the LRU, with no 43 * ->mapping, and with attached buffers. These pages are trivially reclaimable 44 * by the VM, but their apparent absence upsets the VM accounting, and it makes 45 * the numbers in /proc/meminfo look odd. 46 * 47 * So here, we have a buffer which has just come off the forget list. Look to 48 * see if we can strip all buffers from the backing page. 49 * 50 * Called under lock_journal(), and possibly under hfsplus_jbd_datalist_lock. The 51 * caller provided us with a ref against the buffer, and we drop that here. 52 */ 53static void release_buffer_page(struct buffer_head *bh) 54{ 55 struct page *page; 56 57 if (buffer_dirty(bh)) 58 goto nope; 59 if (atomic_read(&bh->b_count) != 1) 60 goto nope; 61 page = bh->b_page; 62 if (!page) 63 goto nope; 64 if (page->mapping) 65 goto nope; 66 67 /* OK, it's a truncated page */ 68 if (TestSetPageLocked(page)) 69 goto nope; 70 71 page_cache_get(page); 72 __brelse(bh); 73 try_to_free_buffers(page); 74 unlock_page(page); 75 page_cache_release(page); 76 return; 77 78nope: 79 __brelse(bh); 80} 81 82/* 83 * Try to acquire hfsplus_jbd_lock_bh_state() against the buffer, when j_list_lock is 84 * held. For ranking reasons we must trylock. If we lose, schedule away and 85 * return 0. j_list_lock is dropped in this case. 86 */ 87static int inverted_lock(hfsplus_jbd_t *journal, struct buffer_head *bh) 88{ 89 if (!hfsplus_jbd_trylock_bh_state(bh)) { 90 spin_unlock(&journal->j_list_lock); 91 schedule(); 92 return 0; 93 } 94 return 1; 95} 96 97/* Done it all: now write the commit record. We should have 98 * cleaned up our previous buffers by now, so if we are in abort 99 * mode we can now just skip the rest of the journal write 100 * entirely. 101 * 102 * Returns 1 if the journal needs to be aborted or 0 on success 103 */ 104static int hfsplus_jbd_write_commit_record(hfsplus_jbd_t *journal, 105 hfsplus_transaction_t *commit_transaction) 106{ 107 struct hfsplus_jbd_head *descriptor; 108 struct buffer_head *bh; 109 int i, ret; 110 int barrier_done = 0; 111 112#ifdef HFSPLUS_JOURNAL_MAC_COMPATIBLE 113 dprint(DBG_JCOMMIT, "Skip writing commit block into the disk\n"); 114 return 0; 115#endif 116 117 if (is_hfsplus_jbd_aborted(journal)) 118 return 0; 119 120 descriptor = hfsplus_jbd_get_descriptor_buffer(journal); 121 if (!descriptor) 122 return 1; 123 124 bh = hfsplus_jh2bh(descriptor); 125 126 /* AKPM: buglet - add `i' to tmp! */ 127 for (i = 0; i < bh->b_size; i += 512) { 128 hfsplus_jbd_header_t *tmp = (hfsplus_jbd_header_t*)bh->b_data; 129 tmp->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER); 130 tmp->h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK); 131 tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid); 132 } 133 134 HFSPLUS_JBUFFER_TRACE(descriptor, "write commit block"); 135 set_buffer_dirty(bh); 136 if (journal->j_flags & JFS_BARRIER) { 137 set_buffer_ordered(bh); 138 barrier_done = 1; 139 } 140 ret = sync_dirty_buffer(bh); 141 /* is it possible for another commit to fail at roughly 142 * the same time as this one? If so, we don't want to 143 * trust the barrier flag in the super, but instead want 144 * to remember if we sent a barrier request 145 */ 146 if (ret == -EOPNOTSUPP && barrier_done) { 147 char b[BDEVNAME_SIZE]; 148 149 printk(KERN_WARNING 150 "JBD: barrier-based sync failed on %s - " 151 "disabling barriers\n", 152 bdevname(journal->j_dev, b)); 153 spin_lock(&journal->j_state_lock); 154 journal->j_flags &= ~JFS_BARRIER; 155 spin_unlock(&journal->j_state_lock); 156 157 /* And try again, without the barrier */ 158 clear_buffer_ordered(bh); 159 set_buffer_uptodate(bh); 160 set_buffer_dirty(bh); 161 ret = sync_dirty_buffer(bh); 162 } 163 put_bh(bh); /* One for getblk() */ 164 hfsplus_jbd_put_journal_head(descriptor); 165 166 return (ret == -EIO); 167} 168 169#ifdef HFSPLUS_JOURNAL_MAC_COMPATIBLE 170static void hfsplus_journaled_swap_blhdr(hfsplus_blhdr_t *blhdr) 171{ 172 int i; 173 174 blhdr->bytes_used = cpu_to_be32(blhdr->bytes_used); 175 blhdr->checksum = cpu_to_be32(blhdr->checksum); 176 177 for (i=1; i<blhdr->num_blocks; i++) { 178 blhdr->binfo[i].bnum = cpu_to_be64(blhdr->binfo[i].bnum); 179 blhdr->binfo[i].bsize = cpu_to_be32(blhdr->binfo[i].bsize); 180 } 181 182 blhdr->num_blocks = cpu_to_be16(blhdr->num_blocks); 183} 184#endif 185 186/* 187 * hfsplus_jbd_commit_transaction 188 * 189 * The primary function for committing a transaction to the log. This 190 * function is called by the journal thread to begin a complete commit. 191 */ 192void hfsplus_jbd_commit_transaction(hfsplus_jbd_t *journal) 193{ 194 hfsplus_transaction_t *commit_transaction; 195 struct hfsplus_jbd_head *jh, *new_jh, *descriptor; 196 struct buffer_head **wbuf = journal->j_wbuf; 197 int bufs; 198 int flags; 199 int err; 200 unsigned long blocknr; 201#ifdef HFSPLUS_JOURNAL_MAC_COMPATIBLE 202 hfsplus_blhdr_t *blhdr = NULL; 203 struct super_block *sb = NULL; 204 struct hfsplus_journal *jnl = NULL; 205#else 206 char *tagp = NULL; 207 hfsplus_jbd_header_t *header; 208 hfsplus_jbd_block_tag_t *tag = NULL; 209 int space_left = 0; 210 int first_tag = 0; 211 int tag_flag; 212#endif 213 int i; 214 215 /* 216 * First job: lock down the current transaction and wait for 217 * all outstanding updates to complete. 218 */ 219 220#ifdef COMMIT_STATS 221 spin_lock(&journal->j_list_lock); 222 summarise_hfsplus_jbd_usage(journal); 223 spin_unlock(&journal->j_list_lock); 224#endif 225 226 /* Do we need to erase the effects of a prior hfsplus_jbd_flush? */ 227 if (journal->j_flags & JFS_FLUSHED) { 228 dprint(DBG_JCOMMIT, "super block updated\n"); 229 hfsplus_jbd_update_superblock(journal, 1); 230 } else { 231 dprint(DBG_JCOMMIT, "superblock not updated\n"); 232 } 233 234 HFSPLUS_J_ASSERT(journal->j_running_transaction != NULL); 235 HFSPLUS_J_ASSERT(journal->j_committing_transaction == NULL); 236 237 commit_transaction = journal->j_running_transaction; 238 HFSPLUS_J_ASSERT(commit_transaction->t_state == HFSPLUS_T_RUNNING); 239 240 dprint(DBG_JCOMMIT, "JBD: starting commit of transaction %d\n", commit_transaction->t_tid); 241 242 spin_lock(&journal->j_state_lock); 243 commit_transaction->t_state = HFSPLUS_T_LOCKED; 244 245 spin_lock(&commit_transaction->t_handle_lock); 246 while (commit_transaction->t_updates) { 247 DEFINE_WAIT(wait); 248 249 prepare_to_wait(&journal->j_wait_updates, &wait, 250 TASK_UNINTERRUPTIBLE); 251 if (commit_transaction->t_updates) { 252 spin_unlock(&commit_transaction->t_handle_lock); 253 spin_unlock(&journal->j_state_lock); 254 schedule(); 255 spin_lock(&journal->j_state_lock); 256 spin_lock(&commit_transaction->t_handle_lock); 257 } 258 finish_wait(&journal->j_wait_updates, &wait); 259 } 260 spin_unlock(&commit_transaction->t_handle_lock); 261 262 HFSPLUS_J_ASSERT (commit_transaction->t_outstanding_credits <= 263 journal->j_max_transaction_buffers); 264 265 /* 266 * First thing we are allowed to do is to discard any remaining 267 * HFSPLUS_BJ_Reserved buffers. Note, it is _not_ permissible to assume 268 * that there are no such buffers: if a large filesystem 269 * operation like a truncate needs to split itself over multiple 270 * transactions, then it may try to do a hfsplus_jbd_restart() while 271 * there are still HFSPLUS_BJ_Reserved buffers outstanding. These must 272 * be released cleanly from the current transaction. 273 * 274 * In this case, the filesystem must still reserve write access 275 * again before modifying the buffer in the new transaction, but 276 * we do not require it to remember exactly which old buffers it 277 * has reserved. This is consistent with the existing behaviour 278 * that multiple hfsplus_jbd_get_write_access() calls to the same 279 * buffer are perfectly permissable. 280 */ 281 while (commit_transaction->t_reserved_list) { 282 jh = commit_transaction->t_reserved_list; 283 HFSPLUS_JBUFFER_TRACE(jh, "reserved, unused: refile"); 284 /* 285 * A hfsplus_jbd_get_undo_access()+hfsplus_jbd_release_buffer() may 286 * leave undo-committed data. 287 */ 288 if (jh->b_committed_data) { 289 struct buffer_head *bh = hfsplus_jh2bh(jh); 290 291 hfsplus_jbd_lock_bh_state(bh); 292 kfree(jh->b_committed_data); 293 jh->b_committed_data = NULL; 294 hfsplus_jbd_unlock_bh_state(bh); 295 } 296 hfsplus_jbd_refile_buffer(journal, jh); 297 } 298 299 /* 300 * Now try to drop any written-back buffers from the journal's 301 * checkpoint lists. We do this *before* commit because it potentially 302 * frees some memory 303 */ 304 spin_lock(&journal->j_list_lock); 305 __hfsplus_jbd_clean_checkpoint_list(journal); 306 spin_unlock(&journal->j_list_lock); 307 308 dprint(DBG_JCOMMIT, "JBD: commit phase 1\n"); 309 310 /* 311 * Switch to a new revoke table. 312 */ 313 hfsplus_jbd_switch_revoke_table(journal); 314 315 commit_transaction->t_state = HFSPLUS_T_FLUSH; 316 journal->j_committing_transaction = commit_transaction; 317 journal->j_running_transaction = NULL; 318 commit_transaction->t_log_start = journal->j_head; 319 wake_up(&journal->j_wait_transaction_locked); 320 spin_unlock(&journal->j_state_lock); 321 322 dprint(DBG_JCOMMIT, "JBD: commit phase 2\n"); 323 324 /* 325 * First, drop modified flag: all accesses to the buffers 326 * will be tracked for a new trasaction only -bzzz 327 */ 328 spin_lock(&journal->j_list_lock); 329 if (commit_transaction->t_buffers) { 330 new_jh = jh = commit_transaction->t_buffers->b_tnext; 331 do { 332 HFSPLUS_J_ASSERT_JH(new_jh, new_jh->b_modified == 1 || 333 new_jh->b_modified == 0); 334 new_jh->b_modified = 0; 335 new_jh = new_jh->b_tnext; 336 } while (new_jh != jh); 337 } 338 spin_unlock(&journal->j_list_lock); 339 340 /* 341 * Now start flushing things to disk, in the order they appear 342 * on the transaction lists. Data blocks go first. 343 */ 344 345 err = 0; 346 /* 347 * Whenever we unlock the journal and sleep, things can get added 348 * onto ->t_sync_datalist, so we have to keep looping back to 349 * write_out_data until we *know* that the list is empty. 350 */ 351 bufs = 0; 352 /* 353 * Cleanup any flushed data buffers from the data list. Even in 354 * abort mode, we want to flush this out as soon as possible. 355 */ 356write_out_data: 357 cond_resched(); 358 spin_lock(&journal->j_list_lock); 359 360 while (commit_transaction->t_sync_datalist) { 361 struct buffer_head *bh; 362 363 jh = commit_transaction->t_sync_datalist; 364 commit_transaction->t_sync_datalist = jh->b_tnext; 365 bh = hfsplus_jh2bh(jh); 366 if (buffer_locked(bh)) { 367 HFSPLUS_BUFFER_TRACE(bh, "locked"); 368 if (!inverted_lock(journal, bh)) 369 goto write_out_data; 370 __hfsplus_jbd_temp_unlink_buffer(jh); 371 __hfsplus_jbd_file_buffer(jh, commit_transaction, 372 HFSPLUS_BJ_Locked); 373 hfsplus_jbd_unlock_bh_state(bh); 374 if (lock_need_resched(&journal->j_list_lock)) { 375 spin_unlock(&journal->j_list_lock); 376 goto write_out_data; 377 } 378 } else { 379 if (buffer_dirty(bh)) { 380 HFSPLUS_BUFFER_TRACE(bh, "start journal writeout"); 381 get_bh(bh); 382 wbuf[bufs++] = bh; 383 if (bufs == journal->j_wbufsize) { 384 dprint(DBG_JCOMMIT, "submit %d writes\n", bufs); 385 spin_unlock(&journal->j_list_lock); 386 ll_rw_block(SWRITE, bufs, wbuf); 387 hfsplus_jbd_brelse_array(wbuf, bufs); 388 bufs = 0; 389 goto write_out_data; 390 } 391 } else { 392 HFSPLUS_BUFFER_TRACE(bh, "writeout complete: unfile"); 393 if (!inverted_lock(journal, bh)) 394 goto write_out_data; 395 __hfsplus_jbd_unfile_buffer(jh); 396 hfsplus_jbd_unlock_bh_state(bh); 397 hfsplus_jbd_remove_journal_head(bh); 398 put_bh(bh); 399 if (lock_need_resched(&journal->j_list_lock)) { 400 spin_unlock(&journal->j_list_lock); 401 goto write_out_data; 402 } 403 } 404 } 405 } 406 407 if (bufs) { 408 spin_unlock(&journal->j_list_lock); 409 ll_rw_block(SWRITE, bufs, wbuf); 410 hfsplus_jbd_brelse_array(wbuf, bufs); 411 spin_lock(&journal->j_list_lock); 412 } 413 414 /* 415 * Wait for all previously submitted IO to complete. 416 */ 417 while (commit_transaction->t_locked_list) { 418 struct buffer_head *bh; 419 420 jh = commit_transaction->t_locked_list->b_tprev; 421 bh = hfsplus_jh2bh(jh); 422 get_bh(bh); 423 if (buffer_locked(bh)) { 424 spin_unlock(&journal->j_list_lock); 425 wait_on_buffer(bh); 426 if (unlikely(!buffer_uptodate(bh))) 427 err = -EIO; 428 spin_lock(&journal->j_list_lock); 429 } 430 if (!inverted_lock(journal, bh)) { 431 put_bh(bh); 432 spin_lock(&journal->j_list_lock); 433 continue; 434 } 435 if (buffer_hfsplus_jbd(bh) && jh->b_jlist == HFSPLUS_BJ_Locked) { 436 __hfsplus_jbd_unfile_buffer(jh); 437 hfsplus_jbd_unlock_bh_state(bh); 438 hfsplus_jbd_remove_journal_head(bh); 439 put_bh(bh); 440 } else { 441 hfsplus_jbd_unlock_bh_state(bh); 442 } 443 put_bh(bh); 444 cond_resched_lock(&journal->j_list_lock); 445 } 446 spin_unlock(&journal->j_list_lock); 447 448 if (err) 449 __hfsplus_jbd_abort_hard(journal); 450 451 hfsplus_jbd_write_revoke_records(journal, commit_transaction); 452 453 dprint(DBG_JCOMMIT, "JBD: commit phase 2\n"); 454 455 /* 456 * If we found any dirty or locked buffers, then we should have 457 * looped back up to the write_out_data label. If there weren't 458 * any then hfsplus_jbd_clean_data_list should have wiped the list 459 * clean by now, so check that it is in fact empty. 460 */ 461 HFSPLUS_J_ASSERT (commit_transaction->t_sync_datalist == NULL); 462 463 dprint(DBG_JCOMMIT, "JBD: commit phase 3\n"); 464 465 /* 466 * Way to go: we have now written out all of the data for a 467 * transaction! Now comes the tricky part: we need to write out 468 * metadata. Loop over the transaction's entire buffer list: 469 */ 470 commit_transaction->t_state = HFSPLUS_T_COMMIT; 471 472 descriptor = NULL; 473#ifdef HFSPLUS_JOURNAL_MAC_COMPATIBLE 474 sb = (struct super_block *)journal->j_private; 475 jnl = &(HFSPLUS_SB(sb).jnl); 476#endif 477 bufs = 0; 478 while (commit_transaction->t_buffers) { 479 480 /* Find the next buffer to be journaled... */ 481 482 jh = commit_transaction->t_buffers; 483 484 /* If we're in abort mode, we just un-journal the buffer and 485 release it for background writing. */ 486 487 if (is_hfsplus_jbd_aborted(journal)) { 488 HFSPLUS_JBUFFER_TRACE(jh, "journal is aborting: refile"); 489 hfsplus_jbd_refile_buffer(journal, jh); 490 /* If that was the last one, we need to clean up 491 * any descriptor buffers which may have been 492 * already allocated, even if we are now 493 * aborting. */ 494 if (!commit_transaction->t_buffers) 495 goto start_hfsplus_jbd_io; 496 continue; 497 } 498 499 /* Make sure we have a descriptor block in which to 500 record the metadata buffer. */ 501 502 if (!descriptor) { 503 struct buffer_head *bh; 504 505 HFSPLUS_J_ASSERT (bufs == 0); 506 507 dprint(DBG_JCOMMIT, "JBD: get descriptor\n"); 508 509 descriptor = hfsplus_jbd_get_descriptor_buffer(journal); 510 if (!descriptor) { 511 __hfsplus_jbd_abort_hard(journal); 512 continue; 513 } 514 515 bh = hfsplus_jh2bh(descriptor); 516 dprint(DBG_JCOMMIT, "JBD: got buffer %llu (%p)\n", (unsigned long long)bh->b_blocknr, bh->b_data); 517#ifdef HFSPLUS_JOURNAL_MAC_COMPATIBLE 518 /* Populate block list header */ 519 blhdr = (hfsplus_blhdr_t *)bh->b_data; 520 blhdr->max_blocks = (jnl->jhdr->blhdr_size / sizeof(struct hfsplus_block_info)) - 1; 521 blhdr->num_blocks = 1; /* One is for header */ 522 blhdr->bytes_used = jnl->jhdr->blhdr_size; 523 blhdr->binfo[0].next = 0; /* Only one Mac transaction */ 524 hfsplus_journal_header_end_update(journal, jnl->jhdr); 525#else 526 header = (hfsplus_jbd_header_t *)&bh->b_data[0]; 527 header->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER); 528 header->h_blocktype = cpu_to_be32(JFS_DESCRIPTOR_BLOCK); 529 header->h_sequence = cpu_to_be32(commit_transaction->t_tid); 530 531 tagp = &bh->b_data[sizeof(hfsplus_jbd_header_t)]; 532 space_left = bh->b_size - sizeof(hfsplus_jbd_header_t); 533 first_tag = 1; 534#endif 535 set_buffer_hfsplus_jbd_jwrite(bh); 536 set_buffer_dirty(bh); 537 wbuf[bufs++] = bh; 538 539 /* Record it so that we can wait for IO 540 completion later */ 541 HFSPLUS_BUFFER_TRACE(bh, "ph3: file as descriptor"); 542 hfsplus_jbd_file_buffer(descriptor, commit_transaction, 543 HFSPLUS_BJ_LogCtl); 544 } 545 546 /* Where is the buffer to be written? */ 547 548 err = hfsplus_jbd_next_log_block(journal, &blocknr); 549 /* If the block mapping failed, just abandon the buffer 550 and repeat this loop: we'll fall into the 551 refile-on-abort condition above. */ 552 if (err) { 553 __hfsplus_jbd_abort_hard(journal); 554 continue; 555 } 556 557 /* 558 * start_this_handle() uses t_outstanding_credits to determine 559 * the free space in the log, but this counter is changed 560 * by hfsplus_jbd_next_log_block() also. 561 */ 562 commit_transaction->t_outstanding_credits--; 563 564 /* Bump b_count to prevent truncate from stumbling over 565 the shadowed buffer! @@@ This can go if we ever get 566 rid of the HFSPLUS_BJ_IO/HFSPLUS_BJ_Shadow pairing of buffers. */ 567 atomic_inc(&hfsplus_jh2bh(jh)->b_count); 568 569 /* Make a temporary IO buffer with which to write it out 570 (this will requeue both the metadata buffer and the 571 temporary IO buffer). new_bh goes on HFSPLUS_BJ_IO*/ 572 573 set_bit(BH_HFSPLUS_JWrite, &hfsplus_jh2bh(jh)->b_state); 574 /* 575 * akpm: hfsplus_jbd_write_metadata_buffer() sets 576 * new_bh->b_transaction to commit_transaction. 577 * We need to clean this up before we release new_bh 578 * (which is of type HFSPLUS_BJ_IO) 579 */ 580 HFSPLUS_JBUFFER_TRACE(jh, "ph3: write metadata"); 581 flags = hfsplus_jbd_write_metadata_buffer(commit_transaction, 582 jh, &new_jh, blocknr); 583 set_bit(BH_HFSPLUS_JWrite, &hfsplus_jh2bh(new_jh)->b_state); 584#ifdef HFSPLUS_JOURNAL_MAC_COMPATIBLE 585 blhdr->binfo[bufs].bnum = (hfsplus_jh2bh(jh)->b_blocknr * sb->s_blocksize) >> HFSPLUS_SECTOR_SHIFT; 586 blhdr->binfo[bufs].bsize = hfsplus_jh2bh(jh)->b_size; 587 blhdr->binfo[bufs].next = 0; 588 blhdr->bytes_used += blhdr->binfo[bufs].bsize; 589 blhdr->num_blocks++; 590 hfsplus_journal_header_end_update(journal, jnl->jhdr); 591#endif 592 wbuf[bufs++] = hfsplus_jh2bh(new_jh); 593 594#ifndef HFSPLUS_JOURNAL_MAC_COMPATIBLE 595 /* Record the new block's tag in the current descriptor 596 buffer */ 597 tag_flag = 0; 598 if (flags & 1) 599 tag_flag |= JFS_FLAG_ESCAPE; 600 if (!first_tag) 601 tag_flag |= JFS_FLAG_SAME_UUID; 602 603 tag = (hfsplus_jbd_block_tag_t *) tagp; 604 tag->t_blocknr = cpu_to_be32(hfsplus_jh2bh(jh)->b_blocknr); 605 tag->t_flags = cpu_to_be32(tag_flag); 606 tagp += sizeof(hfsplus_jbd_block_tag_t); 607 space_left -= sizeof(hfsplus_jbd_block_tag_t); 608 609 if (first_tag) { 610 memcpy (tagp, journal->j_uuid, 16); 611 tagp += 16; 612 space_left -= 16; 613 first_tag = 0; 614 } 615#endif 616 617 /* If there's no more to do, or if the descriptor is full, 618 let the IO rip! */ 619 620#ifdef HFSPLUS_JOURNAL_MAC_COMPATIBLE 621 if (bufs == journal->j_wbufsize || 622 commit_transaction->t_buffers == NULL || 623 bufs == blhdr->max_blocks) 624#else 625 if (bufs == journal->j_wbufsize || 626 commit_transaction->t_buffers == NULL || 627 space_left < sizeof(hfsplus_jbd_block_tag_t) + 16) 628#endif 629 { 630 631#ifdef HFSPLUS_JOURNAL_MAC_COMPATIBLE 632 dprint(DBG_JCOMMIT, "start: %llx, end: %llx, num_blocks: %#x, bytes_used: %#x, j_head: %#lx, j_first: %#lx\n", jnl->jhdr->start, jnl->jhdr->end, blhdr->num_blocks, blhdr->bytes_used, journal->j_head, journal->j_first); 633 blhdr->max_blocks = HFSPLUS_JBD_MAGIC_NUMBER; 634 if (jnl->flags == HFSPLUS_JOURNAL_SWAP) 635 hfsplus_journaled_swap_blhdr(blhdr); 636#endif 637 638 dprint(DBG_JCOMMIT, "JBD: Submit %d IOs\n", bufs); 639 640 /* Write an end-of-descriptor marker before 641 submitting the IOs. "tag" still points to 642 the last tag we set up. */ 643 644#ifndef HFSPLUS_JOURNAL_MAC_COMPATIBLE 645 tag->t_flags |= cpu_to_be32(JFS_FLAG_LAST_TAG); 646#endif 647 648start_hfsplus_jbd_io: 649 for (i = 0; i < bufs; i++) { 650 struct buffer_head *bh = wbuf[i]; 651 lock_buffer(bh); 652 clear_buffer_dirty(bh); 653 set_buffer_uptodate(bh); 654 bh->b_end_io = hfsplus_jbd_end_buffer_io_sync; 655 submit_bh(WRITE, bh); 656 } 657#ifdef HFSPLUS_JOURNAL_MAC_COMPATIBLE 658 //hfsplus_test_block_list_header(__FUNCTION__, jnl->jhdr, jnl); 659#endif 660 cond_resched(); 661 662 /* Force a new descriptor to be generated next 663 time round the loop. */ 664 descriptor = NULL; 665 bufs = 0; 666 } 667 } 668 669 /* Lo and behold: we have just managed to send a transaction to 670 the log. Before we can commit it, wait for the IO so far to 671 complete. Control buffers being written are on the 672 transaction's t_log_list queue, and metadata buffers are on 673 the t_iobuf_list queue. 674 675 Wait for the buffers in reverse order. That way we are 676 less likely to be woken up until all IOs have completed, and 677 so we incur less scheduling load. 678 */ 679 680 dprint(DBG_JCOMMIT, "JBD: commit phase 4\n"); 681 682 /* 683 * akpm: these are HFSPLUS_BJ_IO, and j_list_lock is not needed. 684 * See __hfsplus_jbd_try_to_free_buffer. 685 */ 686wait_for_iobuf: 687 while (commit_transaction->t_iobuf_list != NULL) { 688 struct buffer_head *bh; 689 690 jh = commit_transaction->t_iobuf_list->b_tprev; 691 bh = hfsplus_jh2bh(jh); 692 if (buffer_locked(bh)) { 693 wait_on_buffer(bh); 694 goto wait_for_iobuf; 695 } 696 if (cond_resched()) 697 goto wait_for_iobuf; 698 699 if (unlikely(!buffer_uptodate(bh))) 700 err = -EIO; 701 702 clear_buffer_hfsplus_jbd_jwrite(bh); 703 704 HFSPLUS_JBUFFER_TRACE(jh, "ph4: unfile after journal write"); 705 hfsplus_jbd_unfile_buffer(journal, jh); 706 707 /* 708 * ->t_iobuf_list should contain only dummy buffer_heads 709 * which were created by hfsplus_jbd_write_metadata_buffer(). 710 */ 711 HFSPLUS_BUFFER_TRACE(bh, "dumping temporary bh"); 712 hfsplus_jbd_put_journal_head(jh); 713 __brelse(bh); 714 HFSPLUS_J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0); 715 free_buffer_head(bh); 716 717 /* We also have to unlock and free the corresponding 718 shadowed buffer */ 719 jh = commit_transaction->t_shadow_list->b_tprev; 720 bh = hfsplus_jh2bh(jh); 721 clear_bit(BH_HFSPLUS_JWrite, &bh->b_state); 722 HFSPLUS_J_ASSERT_BH(bh, buffer_hfsplus_jbddirty(bh)); 723 724 /* The metadata is now released for reuse, but we need 725 to remember it against this transaction so that when 726 we finally commit, we can do any checkpointing 727 required. */ 728 HFSPLUS_JBUFFER_TRACE(jh, "file as HFSPLUS_BJ_Forget"); 729 hfsplus_jbd_file_buffer(jh, commit_transaction, HFSPLUS_BJ_Forget); 730 /* Wake up any transactions which were waiting for this 731 IO to complete */ 732 wake_up_bit(&bh->b_state, BH_HFSPLUS_Unshadow); 733 HFSPLUS_JBUFFER_TRACE(jh, "brelse shadowed buffer"); 734 __brelse(bh); 735 } 736 737 HFSPLUS_J_ASSERT (commit_transaction->t_shadow_list == NULL); 738 739 dprint(DBG_JCOMMIT, "JBD: commit phase 5\n"); 740 741 /* Here we wait for the revoke record and descriptor record buffers */ 742 wait_for_ctlbuf: 743 while (commit_transaction->t_log_list != NULL) { 744 struct buffer_head *bh; 745 746 jh = commit_transaction->t_log_list->b_tprev; 747 bh = hfsplus_jh2bh(jh); 748 if (buffer_locked(bh)) { 749 wait_on_buffer(bh); 750 goto wait_for_ctlbuf; 751 } 752 if (cond_resched()) 753 goto wait_for_ctlbuf; 754 755 if (unlikely(!buffer_uptodate(bh))) 756 err = -EIO; 757 758 HFSPLUS_BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile"); 759 clear_buffer_hfsplus_jbd_jwrite(bh); 760 hfsplus_jbd_unfile_buffer(journal, jh); 761 hfsplus_jbd_put_journal_head(jh); 762 __brelse(bh); /* One for getblk */ 763 /* AKPM: bforget here */ 764 } 765 766 dprint(DBG_JCOMMIT, "JBD: commit phase 6\n"); 767 768 if (hfsplus_jbd_write_commit_record(journal, commit_transaction)) 769 err = -EIO; 770 771 if (err) 772 __hfsplus_jbd_abort_hard(journal); 773 774 /* End of a transaction! Finally, we can do checkpoint 775 processing: any buffers committed as a result of this 776 transaction can be removed from any checkpoint list it was on 777 before. */ 778 779 dprint(DBG_JCOMMIT, "JBD: commit phase 7\n"); 780 781 HFSPLUS_J_ASSERT(commit_transaction->t_sync_datalist == NULL); 782 HFSPLUS_J_ASSERT(commit_transaction->t_buffers == NULL); 783 HFSPLUS_J_ASSERT(commit_transaction->t_checkpoint_list == NULL); 784 HFSPLUS_J_ASSERT(commit_transaction->t_iobuf_list == NULL); 785 HFSPLUS_J_ASSERT(commit_transaction->t_shadow_list == NULL); 786 HFSPLUS_J_ASSERT(commit_transaction->t_log_list == NULL); 787 788restart_loop: 789 /* 790 * As there are other places (hfsplus_jbd_unmap_buffer()) adding buffers 791 * to this list we have to be careful and hold the j_list_lock. 792 */ 793 spin_lock(&journal->j_list_lock); 794 while (commit_transaction->t_forget) { 795 hfsplus_transaction_t *cp_transaction; 796 struct buffer_head *bh; 797 798 jh = commit_transaction->t_forget; 799 spin_unlock(&journal->j_list_lock); 800 bh = hfsplus_jh2bh(jh); 801 hfsplus_jbd_lock_bh_state(bh); 802 HFSPLUS_J_ASSERT_JH(jh, jh->b_transaction == commit_transaction || 803 jh->b_transaction == journal->j_running_transaction); 804 805 /* 806 * If there is undo-protected committed data against 807 * this buffer, then we can remove it now. If it is a 808 * buffer needing such protection, the old frozen_data 809 * field now points to a committed version of the 810 * buffer, so rotate that field to the new committed 811 * data. 812 * 813 * Otherwise, we can just throw away the frozen data now. 814 */ 815 if (jh->b_committed_data) { 816 kfree(jh->b_committed_data); 817 jh->b_committed_data = NULL; 818 if (jh->b_frozen_data) { 819 jh->b_committed_data = jh->b_frozen_data; 820 jh->b_frozen_data = NULL; 821 } 822 } else if (jh->b_frozen_data) { 823 kfree(jh->b_frozen_data); 824 jh->b_frozen_data = NULL; 825 } 826 827 spin_lock(&journal->j_list_lock); 828 cp_transaction = jh->b_cp_transaction; 829 if (cp_transaction) { 830 HFSPLUS_JBUFFER_TRACE(jh, "remove from old cp transaction"); 831 __hfsplus_jbd_remove_checkpoint(jh); 832 } 833 834 /* Only re-checkpoint the buffer_head if it is marked 835 * dirty. If the buffer was added to the HFSPLUS_BJ_Forget list 836 * by hfsplus_jbd_forget, it may no longer be dirty and 837 * there's no point in keeping a checkpoint record for 838 * it. */ 839 840 /* A buffer which has been freed while still being 841 * journaled by a previous transaction may end up still 842 * being dirty here, but we want to avoid writing back 843 * that buffer in the future now that the last use has 844 * been committed. That's not only a performance gain, 845 * it also stops aliasing problems if the buffer is left 846 * behind for writeback and gets reallocated for another 847 * use in a different page. */ 848 if (buffer_hfsplus_jbd_freed(bh)) { 849 clear_buffer_hfsplus_jbd_freed(bh); 850 clear_buffer_hfsplus_jbddirty(bh); 851 } 852 853 if (buffer_hfsplus_jbddirty(bh)) { 854 HFSPLUS_JBUFFER_TRACE(jh, "add to new checkpointing trans"); 855 __hfsplus_jbd_insert_checkpoint(jh, commit_transaction); 856 HFSPLUS_JBUFFER_TRACE(jh, "refile for checkpoint writeback"); 857 __hfsplus_jbd_refile_buffer(jh); 858 hfsplus_jbd_unlock_bh_state(bh); 859 } else { 860 HFSPLUS_J_ASSERT_BH(bh, !buffer_dirty(bh)); 861 HFSPLUS_J_ASSERT_JH(jh, jh->b_next_transaction == NULL); 862 __hfsplus_jbd_unfile_buffer(jh); 863 hfsplus_jbd_unlock_bh_state(bh); 864 hfsplus_jbd_remove_journal_head(bh); /* needs a brelse */ 865 release_buffer_page(bh); 866 } 867 cond_resched_lock(&journal->j_list_lock); 868 } 869 spin_unlock(&journal->j_list_lock); 870 /* 871 * This is a bit sleazy. We borrow j_list_lock to protect 872 * journal->j_committing_transaction in __hfsplus_jbd_remove_checkpoint. 873 * Really, __hfsplus_jbd_remove_checkpoint should be using j_state_lock but 874 * it's a bit hassle to hold that across __hfsplus_jbd_remove_checkpoint 875 */ 876 spin_lock(&journal->j_state_lock); 877 spin_lock(&journal->j_list_lock); 878 /* 879 * Now recheck if some buffers did not get attached to the transaction 880 * while the lock was dropped... 881 */ 882 if (commit_transaction->t_forget) { 883 spin_unlock(&journal->j_list_lock); 884 spin_unlock(&journal->j_state_lock); 885 goto restart_loop; 886 } 887 888 /* Done with this transaction! */ 889 890 dprint(DBG_JCOMMIT, "JBD: commit phase 8\n"); 891 892 HFSPLUS_J_ASSERT(commit_transaction->t_state == HFSPLUS_T_COMMIT); 893 894 commit_transaction->t_state = HFSPLUS_T_FINISHED; 895 HFSPLUS_J_ASSERT(commit_transaction == journal->j_committing_transaction); 896 journal->j_commit_sequence = commit_transaction->t_tid; 897 journal->j_committing_transaction = NULL; 898 spin_unlock(&journal->j_state_lock); 899 900 if (commit_transaction->t_checkpoint_list == NULL) { 901 __hfsplus_jbd_drop_transaction(journal, commit_transaction); 902 } else { 903 if (journal->j_checkpoint_transactions == NULL) { 904 journal->j_checkpoint_transactions = commit_transaction; 905 commit_transaction->t_cpnext = commit_transaction; 906 commit_transaction->t_cpprev = commit_transaction; 907 } else { 908 commit_transaction->t_cpnext = 909 journal->j_checkpoint_transactions; 910 commit_transaction->t_cpprev = 911 commit_transaction->t_cpnext->t_cpprev; 912 commit_transaction->t_cpnext->t_cpprev = 913 commit_transaction; 914 commit_transaction->t_cpprev->t_cpnext = 915 commit_transaction; 916 } 917 } 918 spin_unlock(&journal->j_list_lock); 919 920 dprint(DBG_JCOMMIT, "JBD: commit %d complete, head %d\n", journal->j_commit_sequence, journal->j_tail_sequence); 921 922 wake_up(&journal->j_wait_done_commit); 923} 924