1/*
2 * linux/fs/commit.c
3 *
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
5 *
6 * Copyright 1998 Red Hat corp --- All Rights Reserved
7 *
8 * This file is part of the Linux kernel and is made available under
9 * the terms of the GNU General Public License, version 2, or at your
10 * option, any later version, incorporated herein by reference.
11 *
12 * Journal commit routines for the generic filesystem journaling code;
13 * part of the ext2fs journaling system.
14 */
15
16#include <linux/time.h>
17#include <linux/fs.h>
18#include <linux/errno.h>
19#include <linux/slab.h>
20#include <linux/mm.h>
21#include <linux/pagemap.h>
22#include <linux/smp_lock.h>
23#include "hfsplus_jbd.h"
24#include "hfsplus_fs.h"
25
26/*
27 * Default IO end handler for temporary HFSPLUS_BJ_IO buffer_heads.
28 */
29static void hfsplus_jbd_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
30{
31	HFSPLUS_BUFFER_TRACE(bh, "");
32	if (uptodate)
33		set_buffer_uptodate(bh);
34	else
35		clear_buffer_uptodate(bh);
36	unlock_buffer(bh);
37}
38
39/*
40 * When an ext3-ordered file is truncated, it is possible that many pages are
41 * not sucessfully freed, because they are attached to a committing transaction.
42 * After the transaction commits, these pages are left on the LRU, with no
43 * ->mapping, and with attached buffers.  These pages are trivially reclaimable
44 * by the VM, but their apparent absence upsets the VM accounting, and it makes
45 * the numbers in /proc/meminfo look odd.
46 *
47 * So here, we have a buffer which has just come off the forget list.  Look to
48 * see if we can strip all buffers from the backing page.
49 *
50 * Called under lock_journal(), and possibly under hfsplus_jbd_datalist_lock.  The
51 * caller provided us with a ref against the buffer, and we drop that here.
52 */
53static void release_buffer_page(struct buffer_head *bh)
54{
55	struct page *page;
56
57	if (buffer_dirty(bh))
58		goto nope;
59	if (atomic_read(&bh->b_count) != 1)
60		goto nope;
61	page = bh->b_page;
62	if (!page)
63		goto nope;
64	if (page->mapping)
65		goto nope;
66
67	/* OK, it's a truncated page */
68	if (TestSetPageLocked(page))
69		goto nope;
70
71	page_cache_get(page);
72	__brelse(bh);
73	try_to_free_buffers(page);
74	unlock_page(page);
75	page_cache_release(page);
76	return;
77
78nope:
79	__brelse(bh);
80}
81
82/*
83 * Try to acquire hfsplus_jbd_lock_bh_state() against the buffer, when j_list_lock is
84 * held.  For ranking reasons we must trylock.  If we lose, schedule away and
85 * return 0.  j_list_lock is dropped in this case.
86 */
87static int inverted_lock(hfsplus_jbd_t *journal, struct buffer_head *bh)
88{
89	if (!hfsplus_jbd_trylock_bh_state(bh)) {
90		spin_unlock(&journal->j_list_lock);
91		schedule();
92		return 0;
93	}
94	return 1;
95}
96
97/* Done it all: now write the commit record.  We should have
98 * cleaned up our previous buffers by now, so if we are in abort
99 * mode we can now just skip the rest of the journal write
100 * entirely.
101 *
102 * Returns 1 if the journal needs to be aborted or 0 on success
103 */
104static int hfsplus_jbd_write_commit_record(hfsplus_jbd_t *journal,
105					hfsplus_transaction_t *commit_transaction)
106{
107	struct hfsplus_jbd_head *descriptor;
108	struct buffer_head *bh;
109	int i, ret;
110	int barrier_done = 0;
111
112#ifdef HFSPLUS_JOURNAL_MAC_COMPATIBLE
113	dprint(DBG_JCOMMIT, "Skip writing commit block into the disk\n");
114	return 0;
115#endif
116
117	if (is_hfsplus_jbd_aborted(journal))
118		return 0;
119
120	descriptor = hfsplus_jbd_get_descriptor_buffer(journal);
121	if (!descriptor)
122		return 1;
123
124	bh = hfsplus_jh2bh(descriptor);
125
126	/* AKPM: buglet - add `i' to tmp! */
127	for (i = 0; i < bh->b_size; i += 512) {
128		hfsplus_jbd_header_t *tmp = (hfsplus_jbd_header_t*)bh->b_data;
129		tmp->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER);
130		tmp->h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK);
131		tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
132	}
133
134	HFSPLUS_JBUFFER_TRACE(descriptor, "write commit block");
135	set_buffer_dirty(bh);
136	if (journal->j_flags & JFS_BARRIER) {
137		set_buffer_ordered(bh);
138		barrier_done = 1;
139	}
140	ret = sync_dirty_buffer(bh);
141	/* is it possible for another commit to fail at roughly
142	 * the same time as this one?  If so, we don't want to
143	 * trust the barrier flag in the super, but instead want
144	 * to remember if we sent a barrier request
145	 */
146	if (ret == -EOPNOTSUPP && barrier_done) {
147		char b[BDEVNAME_SIZE];
148
149		printk(KERN_WARNING
150			"JBD: barrier-based sync failed on %s - "
151			"disabling barriers\n",
152			bdevname(journal->j_dev, b));
153		spin_lock(&journal->j_state_lock);
154		journal->j_flags &= ~JFS_BARRIER;
155		spin_unlock(&journal->j_state_lock);
156
157		/* And try again, without the barrier */
158		clear_buffer_ordered(bh);
159		set_buffer_uptodate(bh);
160		set_buffer_dirty(bh);
161		ret = sync_dirty_buffer(bh);
162	}
163	put_bh(bh);		/* One for getblk() */
164	hfsplus_jbd_put_journal_head(descriptor);
165
166	return (ret == -EIO);
167}
168
169#ifdef HFSPLUS_JOURNAL_MAC_COMPATIBLE
170static void hfsplus_journaled_swap_blhdr(hfsplus_blhdr_t *blhdr)
171{
172	int i;
173
174	blhdr->bytes_used = cpu_to_be32(blhdr->bytes_used);
175	blhdr->checksum = cpu_to_be32(blhdr->checksum);
176
177	for (i=1; i<blhdr->num_blocks; i++) {
178		blhdr->binfo[i].bnum = cpu_to_be64(blhdr->binfo[i].bnum);
179		blhdr->binfo[i].bsize = cpu_to_be32(blhdr->binfo[i].bsize);
180	}
181
182	blhdr->num_blocks = cpu_to_be16(blhdr->num_blocks);
183}
184#endif
185
186/*
187 * hfsplus_jbd_commit_transaction
188 *
189 * The primary function for committing a transaction to the log.  This
190 * function is called by the journal thread to begin a complete commit.
191 */
192void hfsplus_jbd_commit_transaction(hfsplus_jbd_t *journal)
193{
194	hfsplus_transaction_t *commit_transaction;
195	struct hfsplus_jbd_head *jh, *new_jh, *descriptor;
196	struct buffer_head **wbuf = journal->j_wbuf;
197	int bufs;
198	int flags;
199	int err;
200	unsigned long blocknr;
201#ifdef HFSPLUS_JOURNAL_MAC_COMPATIBLE
202	hfsplus_blhdr_t *blhdr = NULL;
203	struct super_block *sb = NULL;
204	struct hfsplus_journal *jnl = NULL;
205#else
206	char *tagp = NULL;
207	hfsplus_jbd_header_t *header;
208	hfsplus_jbd_block_tag_t *tag = NULL;
209	int space_left = 0;
210	int first_tag = 0;
211	int tag_flag;
212#endif
213	int i;
214
215	/*
216	 * First job: lock down the current transaction and wait for
217	 * all outstanding updates to complete.
218	 */
219
220#ifdef COMMIT_STATS
221	spin_lock(&journal->j_list_lock);
222	summarise_hfsplus_jbd_usage(journal);
223	spin_unlock(&journal->j_list_lock);
224#endif
225
226	/* Do we need to erase the effects of a prior hfsplus_jbd_flush? */
227	if (journal->j_flags & JFS_FLUSHED) {
228		dprint(DBG_JCOMMIT, "super block updated\n");
229		hfsplus_jbd_update_superblock(journal, 1);
230	} else {
231		dprint(DBG_JCOMMIT, "superblock not updated\n");
232	}
233
234	HFSPLUS_J_ASSERT(journal->j_running_transaction != NULL);
235	HFSPLUS_J_ASSERT(journal->j_committing_transaction == NULL);
236
237	commit_transaction = journal->j_running_transaction;
238	HFSPLUS_J_ASSERT(commit_transaction->t_state == HFSPLUS_T_RUNNING);
239
240	dprint(DBG_JCOMMIT, "JBD: starting commit of transaction %d\n", commit_transaction->t_tid);
241
242	spin_lock(&journal->j_state_lock);
243	commit_transaction->t_state = HFSPLUS_T_LOCKED;
244
245	spin_lock(&commit_transaction->t_handle_lock);
246	while (commit_transaction->t_updates) {
247		DEFINE_WAIT(wait);
248
249		prepare_to_wait(&journal->j_wait_updates, &wait,
250					TASK_UNINTERRUPTIBLE);
251		if (commit_transaction->t_updates) {
252			spin_unlock(&commit_transaction->t_handle_lock);
253			spin_unlock(&journal->j_state_lock);
254			schedule();
255			spin_lock(&journal->j_state_lock);
256			spin_lock(&commit_transaction->t_handle_lock);
257		}
258		finish_wait(&journal->j_wait_updates, &wait);
259	}
260	spin_unlock(&commit_transaction->t_handle_lock);
261
262	HFSPLUS_J_ASSERT (commit_transaction->t_outstanding_credits <=
263			journal->j_max_transaction_buffers);
264
265	/*
266	 * First thing we are allowed to do is to discard any remaining
267	 * HFSPLUS_BJ_Reserved buffers.  Note, it is _not_ permissible to assume
268	 * that there are no such buffers: if a large filesystem
269	 * operation like a truncate needs to split itself over multiple
270	 * transactions, then it may try to do a hfsplus_jbd_restart() while
271	 * there are still HFSPLUS_BJ_Reserved buffers outstanding.  These must
272	 * be released cleanly from the current transaction.
273	 *
274	 * In this case, the filesystem must still reserve write access
275	 * again before modifying the buffer in the new transaction, but
276	 * we do not require it to remember exactly which old buffers it
277	 * has reserved.  This is consistent with the existing behaviour
278	 * that multiple hfsplus_jbd_get_write_access() calls to the same
279	 * buffer are perfectly permissable.
280	 */
281	while (commit_transaction->t_reserved_list) {
282		jh = commit_transaction->t_reserved_list;
283		HFSPLUS_JBUFFER_TRACE(jh, "reserved, unused: refile");
284		/*
285		 * A hfsplus_jbd_get_undo_access()+hfsplus_jbd_release_buffer() may
286		 * leave undo-committed data.
287		 */
288		if (jh->b_committed_data) {
289			struct buffer_head *bh = hfsplus_jh2bh(jh);
290
291			hfsplus_jbd_lock_bh_state(bh);
292			kfree(jh->b_committed_data);
293			jh->b_committed_data = NULL;
294			hfsplus_jbd_unlock_bh_state(bh);
295		}
296		hfsplus_jbd_refile_buffer(journal, jh);
297	}
298
299	/*
300	 * Now try to drop any written-back buffers from the journal's
301	 * checkpoint lists.  We do this *before* commit because it potentially
302	 * frees some memory
303	 */
304	spin_lock(&journal->j_list_lock);
305	__hfsplus_jbd_clean_checkpoint_list(journal);
306	spin_unlock(&journal->j_list_lock);
307
308	dprint(DBG_JCOMMIT, "JBD: commit phase 1\n");
309
310	/*
311	 * Switch to a new revoke table.
312	 */
313	hfsplus_jbd_switch_revoke_table(journal);
314
315	commit_transaction->t_state = HFSPLUS_T_FLUSH;
316	journal->j_committing_transaction = commit_transaction;
317	journal->j_running_transaction = NULL;
318	commit_transaction->t_log_start = journal->j_head;
319	wake_up(&journal->j_wait_transaction_locked);
320	spin_unlock(&journal->j_state_lock);
321
322	dprint(DBG_JCOMMIT, "JBD: commit phase 2\n");
323
324	/*
325	 * First, drop modified flag: all accesses to the buffers
326	 * will be tracked for a new trasaction only -bzzz
327	 */
328	spin_lock(&journal->j_list_lock);
329	if (commit_transaction->t_buffers) {
330		new_jh = jh = commit_transaction->t_buffers->b_tnext;
331		do {
332			HFSPLUS_J_ASSERT_JH(new_jh, new_jh->b_modified == 1 ||
333					new_jh->b_modified == 0);
334			new_jh->b_modified = 0;
335			new_jh = new_jh->b_tnext;
336		} while (new_jh != jh);
337	}
338	spin_unlock(&journal->j_list_lock);
339
340	/*
341	 * Now start flushing things to disk, in the order they appear
342	 * on the transaction lists.  Data blocks go first.
343	 */
344
345	err = 0;
346	/*
347	 * Whenever we unlock the journal and sleep, things can get added
348	 * onto ->t_sync_datalist, so we have to keep looping back to
349	 * write_out_data until we *know* that the list is empty.
350	 */
351	bufs = 0;
352	/*
353	 * Cleanup any flushed data buffers from the data list.  Even in
354	 * abort mode, we want to flush this out as soon as possible.
355	 */
356write_out_data:
357	cond_resched();
358	spin_lock(&journal->j_list_lock);
359
360	while (commit_transaction->t_sync_datalist) {
361		struct buffer_head *bh;
362
363		jh = commit_transaction->t_sync_datalist;
364		commit_transaction->t_sync_datalist = jh->b_tnext;
365		bh = hfsplus_jh2bh(jh);
366		if (buffer_locked(bh)) {
367			HFSPLUS_BUFFER_TRACE(bh, "locked");
368			if (!inverted_lock(journal, bh))
369				goto write_out_data;
370			__hfsplus_jbd_temp_unlink_buffer(jh);
371			__hfsplus_jbd_file_buffer(jh, commit_transaction,
372						HFSPLUS_BJ_Locked);
373			hfsplus_jbd_unlock_bh_state(bh);
374			if (lock_need_resched(&journal->j_list_lock)) {
375				spin_unlock(&journal->j_list_lock);
376				goto write_out_data;
377			}
378		} else {
379			if (buffer_dirty(bh)) {
380				HFSPLUS_BUFFER_TRACE(bh, "start journal writeout");
381				get_bh(bh);
382				wbuf[bufs++] = bh;
383				if (bufs == journal->j_wbufsize) {
384					dprint(DBG_JCOMMIT, "submit %d writes\n", bufs);
385					spin_unlock(&journal->j_list_lock);
386					ll_rw_block(SWRITE, bufs, wbuf);
387					hfsplus_jbd_brelse_array(wbuf, bufs);
388					bufs = 0;
389					goto write_out_data;
390				}
391			} else {
392				HFSPLUS_BUFFER_TRACE(bh, "writeout complete: unfile");
393				if (!inverted_lock(journal, bh))
394					goto write_out_data;
395				__hfsplus_jbd_unfile_buffer(jh);
396				hfsplus_jbd_unlock_bh_state(bh);
397				hfsplus_jbd_remove_journal_head(bh);
398				put_bh(bh);
399				if (lock_need_resched(&journal->j_list_lock)) {
400					spin_unlock(&journal->j_list_lock);
401					goto write_out_data;
402				}
403			}
404		}
405	}
406
407	if (bufs) {
408		spin_unlock(&journal->j_list_lock);
409		ll_rw_block(SWRITE, bufs, wbuf);
410		hfsplus_jbd_brelse_array(wbuf, bufs);
411		spin_lock(&journal->j_list_lock);
412	}
413
414	/*
415	 * Wait for all previously submitted IO to complete.
416	 */
417	while (commit_transaction->t_locked_list) {
418		struct buffer_head *bh;
419
420		jh = commit_transaction->t_locked_list->b_tprev;
421		bh = hfsplus_jh2bh(jh);
422		get_bh(bh);
423		if (buffer_locked(bh)) {
424			spin_unlock(&journal->j_list_lock);
425			wait_on_buffer(bh);
426			if (unlikely(!buffer_uptodate(bh)))
427				err = -EIO;
428			spin_lock(&journal->j_list_lock);
429		}
430		if (!inverted_lock(journal, bh)) {
431			put_bh(bh);
432			spin_lock(&journal->j_list_lock);
433			continue;
434		}
435		if (buffer_hfsplus_jbd(bh) && jh->b_jlist == HFSPLUS_BJ_Locked) {
436			__hfsplus_jbd_unfile_buffer(jh);
437			hfsplus_jbd_unlock_bh_state(bh);
438			hfsplus_jbd_remove_journal_head(bh);
439			put_bh(bh);
440		} else {
441			hfsplus_jbd_unlock_bh_state(bh);
442		}
443		put_bh(bh);
444		cond_resched_lock(&journal->j_list_lock);
445	}
446	spin_unlock(&journal->j_list_lock);
447
448	if (err)
449		__hfsplus_jbd_abort_hard(journal);
450
451	hfsplus_jbd_write_revoke_records(journal, commit_transaction);
452
453	dprint(DBG_JCOMMIT, "JBD: commit phase 2\n");
454
455	/*
456	 * If we found any dirty or locked buffers, then we should have
457	 * looped back up to the write_out_data label.  If there weren't
458	 * any then hfsplus_jbd_clean_data_list should have wiped the list
459	 * clean by now, so check that it is in fact empty.
460	 */
461	HFSPLUS_J_ASSERT (commit_transaction->t_sync_datalist == NULL);
462
463	dprint(DBG_JCOMMIT, "JBD: commit phase 3\n");
464
465	/*
466	 * Way to go: we have now written out all of the data for a
467	 * transaction!  Now comes the tricky part: we need to write out
468	 * metadata.  Loop over the transaction's entire buffer list:
469	 */
470	commit_transaction->t_state = HFSPLUS_T_COMMIT;
471
472	descriptor = NULL;
473#ifdef HFSPLUS_JOURNAL_MAC_COMPATIBLE
474	sb = (struct super_block *)journal->j_private;
475	jnl = &(HFSPLUS_SB(sb).jnl);
476#endif
477	bufs = 0;
478	while (commit_transaction->t_buffers) {
479
480		/* Find the next buffer to be journaled... */
481
482		jh = commit_transaction->t_buffers;
483
484		/* If we're in abort mode, we just un-journal the buffer and
485		   release it for background writing. */
486
487		if (is_hfsplus_jbd_aborted(journal)) {
488			HFSPLUS_JBUFFER_TRACE(jh, "journal is aborting: refile");
489			hfsplus_jbd_refile_buffer(journal, jh);
490			/* If that was the last one, we need to clean up
491			 * any descriptor buffers which may have been
492			 * already allocated, even if we are now
493			 * aborting. */
494			if (!commit_transaction->t_buffers)
495				goto start_hfsplus_jbd_io;
496			continue;
497		}
498
499		/* Make sure we have a descriptor block in which to
500		   record the metadata buffer. */
501
502		if (!descriptor) {
503			struct buffer_head *bh;
504
505			HFSPLUS_J_ASSERT (bufs == 0);
506
507			dprint(DBG_JCOMMIT, "JBD: get descriptor\n");
508
509			descriptor = hfsplus_jbd_get_descriptor_buffer(journal);
510			if (!descriptor) {
511				__hfsplus_jbd_abort_hard(journal);
512				continue;
513			}
514
515			bh = hfsplus_jh2bh(descriptor);
516			dprint(DBG_JCOMMIT, "JBD: got buffer %llu (%p)\n", (unsigned long long)bh->b_blocknr, bh->b_data);
517#ifdef HFSPLUS_JOURNAL_MAC_COMPATIBLE
518			/* Populate block list header */
519			blhdr = (hfsplus_blhdr_t *)bh->b_data;
520			blhdr->max_blocks = (jnl->jhdr->blhdr_size / sizeof(struct hfsplus_block_info)) - 1;
521			blhdr->num_blocks = 1; /* One is for header */
522			blhdr->bytes_used = jnl->jhdr->blhdr_size;
523			blhdr->binfo[0].next = 0; /* Only one Mac transaction */
524			hfsplus_journal_header_end_update(journal, jnl->jhdr);
525#else
526			header = (hfsplus_jbd_header_t *)&bh->b_data[0];
527			header->h_magic     = cpu_to_be32(JFS_MAGIC_NUMBER);
528			header->h_blocktype = cpu_to_be32(JFS_DESCRIPTOR_BLOCK);
529			header->h_sequence  = cpu_to_be32(commit_transaction->t_tid);
530
531			tagp = &bh->b_data[sizeof(hfsplus_jbd_header_t)];
532			space_left = bh->b_size - sizeof(hfsplus_jbd_header_t);
533			first_tag = 1;
534#endif
535			set_buffer_hfsplus_jbd_jwrite(bh);
536			set_buffer_dirty(bh);
537			wbuf[bufs++] = bh;
538
539			/* Record it so that we can wait for IO
540                           completion later */
541			HFSPLUS_BUFFER_TRACE(bh, "ph3: file as descriptor");
542			hfsplus_jbd_file_buffer(descriptor, commit_transaction,
543					HFSPLUS_BJ_LogCtl);
544		}
545
546		/* Where is the buffer to be written? */
547
548		err = hfsplus_jbd_next_log_block(journal, &blocknr);
549		/* If the block mapping failed, just abandon the buffer
550		   and repeat this loop: we'll fall into the
551		   refile-on-abort condition above. */
552		if (err) {
553			__hfsplus_jbd_abort_hard(journal);
554			continue;
555		}
556
557		/*
558		 * start_this_handle() uses t_outstanding_credits to determine
559		 * the free space in the log, but this counter is changed
560		 * by hfsplus_jbd_next_log_block() also.
561		 */
562		commit_transaction->t_outstanding_credits--;
563
564		/* Bump b_count to prevent truncate from stumbling over
565                   the shadowed buffer!  @@@ This can go if we ever get
566                   rid of the HFSPLUS_BJ_IO/HFSPLUS_BJ_Shadow pairing of buffers. */
567		atomic_inc(&hfsplus_jh2bh(jh)->b_count);
568
569		/* Make a temporary IO buffer with which to write it out
570                   (this will requeue both the metadata buffer and the
571                   temporary IO buffer). new_bh goes on HFSPLUS_BJ_IO*/
572
573		set_bit(BH_HFSPLUS_JWrite, &hfsplus_jh2bh(jh)->b_state);
574		/*
575		 * akpm: hfsplus_jbd_write_metadata_buffer() sets
576		 * new_bh->b_transaction to commit_transaction.
577		 * We need to clean this up before we release new_bh
578		 * (which is of type HFSPLUS_BJ_IO)
579		 */
580		HFSPLUS_JBUFFER_TRACE(jh, "ph3: write metadata");
581		flags = hfsplus_jbd_write_metadata_buffer(commit_transaction,
582						      jh, &new_jh, blocknr);
583		set_bit(BH_HFSPLUS_JWrite, &hfsplus_jh2bh(new_jh)->b_state);
584#ifdef HFSPLUS_JOURNAL_MAC_COMPATIBLE
585		blhdr->binfo[bufs].bnum = (hfsplus_jh2bh(jh)->b_blocknr * sb->s_blocksize) >> HFSPLUS_SECTOR_SHIFT;
586		blhdr->binfo[bufs].bsize = hfsplus_jh2bh(jh)->b_size;
587		blhdr->binfo[bufs].next = 0;
588		blhdr->bytes_used += blhdr->binfo[bufs].bsize;
589		blhdr->num_blocks++;
590		hfsplus_journal_header_end_update(journal, jnl->jhdr);
591#endif
592		wbuf[bufs++] = hfsplus_jh2bh(new_jh);
593
594#ifndef HFSPLUS_JOURNAL_MAC_COMPATIBLE
595		/* Record the new block's tag in the current descriptor
596                   buffer */
597		tag_flag = 0;
598		if (flags & 1)
599			tag_flag |= JFS_FLAG_ESCAPE;
600		if (!first_tag)
601			tag_flag |= JFS_FLAG_SAME_UUID;
602
603		tag = (hfsplus_jbd_block_tag_t *) tagp;
604		tag->t_blocknr = cpu_to_be32(hfsplus_jh2bh(jh)->b_blocknr);
605		tag->t_flags = cpu_to_be32(tag_flag);
606		tagp += sizeof(hfsplus_jbd_block_tag_t);
607		space_left -= sizeof(hfsplus_jbd_block_tag_t);
608
609		if (first_tag) {
610			memcpy (tagp, journal->j_uuid, 16);
611			tagp += 16;
612			space_left -= 16;
613			first_tag = 0;
614		}
615#endif
616
617		/* If there's no more to do, or if the descriptor is full,
618		   let the IO rip! */
619
620#ifdef HFSPLUS_JOURNAL_MAC_COMPATIBLE
621		if (bufs == journal->j_wbufsize ||
622				commit_transaction->t_buffers == NULL ||
623				bufs == blhdr->max_blocks)
624#else
625		if (bufs == journal->j_wbufsize ||
626		    commit_transaction->t_buffers == NULL ||
627		    space_left < sizeof(hfsplus_jbd_block_tag_t) + 16)
628#endif
629		{
630
631#ifdef HFSPLUS_JOURNAL_MAC_COMPATIBLE
632			dprint(DBG_JCOMMIT, "start: %llx, end: %llx, num_blocks: %#x, bytes_used: %#x, j_head: %#lx, j_first: %#lx\n", jnl->jhdr->start, jnl->jhdr->end, blhdr->num_blocks, blhdr->bytes_used, journal->j_head, journal->j_first);
633			blhdr->max_blocks = HFSPLUS_JBD_MAGIC_NUMBER;
634			if (jnl->flags == HFSPLUS_JOURNAL_SWAP)
635				hfsplus_journaled_swap_blhdr(blhdr);
636#endif
637
638			dprint(DBG_JCOMMIT, "JBD: Submit %d IOs\n", bufs);
639
640			/* Write an end-of-descriptor marker before
641                           submitting the IOs.  "tag" still points to
642                           the last tag we set up. */
643
644#ifndef HFSPLUS_JOURNAL_MAC_COMPATIBLE
645			tag->t_flags |= cpu_to_be32(JFS_FLAG_LAST_TAG);
646#endif
647
648start_hfsplus_jbd_io:
649			for (i = 0; i < bufs; i++) {
650				struct buffer_head *bh = wbuf[i];
651				lock_buffer(bh);
652				clear_buffer_dirty(bh);
653				set_buffer_uptodate(bh);
654				bh->b_end_io = hfsplus_jbd_end_buffer_io_sync;
655				submit_bh(WRITE, bh);
656			}
657#ifdef HFSPLUS_JOURNAL_MAC_COMPATIBLE
658			//hfsplus_test_block_list_header(__FUNCTION__, jnl->jhdr, jnl);
659#endif
660			cond_resched();
661
662			/* Force a new descriptor to be generated next
663                           time round the loop. */
664			descriptor = NULL;
665			bufs = 0;
666		}
667	}
668
669	/* Lo and behold: we have just managed to send a transaction to
670           the log.  Before we can commit it, wait for the IO so far to
671           complete.  Control buffers being written are on the
672           transaction's t_log_list queue, and metadata buffers are on
673           the t_iobuf_list queue.
674
675	   Wait for the buffers in reverse order.  That way we are
676	   less likely to be woken up until all IOs have completed, and
677	   so we incur less scheduling load.
678	*/
679
680	dprint(DBG_JCOMMIT, "JBD: commit phase 4\n");
681
682	/*
683	 * akpm: these are HFSPLUS_BJ_IO, and j_list_lock is not needed.
684	 * See __hfsplus_jbd_try_to_free_buffer.
685	 */
686wait_for_iobuf:
687	while (commit_transaction->t_iobuf_list != NULL) {
688		struct buffer_head *bh;
689
690		jh = commit_transaction->t_iobuf_list->b_tprev;
691		bh = hfsplus_jh2bh(jh);
692		if (buffer_locked(bh)) {
693			wait_on_buffer(bh);
694			goto wait_for_iobuf;
695		}
696		if (cond_resched())
697			goto wait_for_iobuf;
698
699		if (unlikely(!buffer_uptodate(bh)))
700			err = -EIO;
701
702		clear_buffer_hfsplus_jbd_jwrite(bh);
703
704		HFSPLUS_JBUFFER_TRACE(jh, "ph4: unfile after journal write");
705		hfsplus_jbd_unfile_buffer(journal, jh);
706
707		/*
708		 * ->t_iobuf_list should contain only dummy buffer_heads
709		 * which were created by hfsplus_jbd_write_metadata_buffer().
710		 */
711		HFSPLUS_BUFFER_TRACE(bh, "dumping temporary bh");
712		hfsplus_jbd_put_journal_head(jh);
713		__brelse(bh);
714		HFSPLUS_J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0);
715		free_buffer_head(bh);
716
717		/* We also have to unlock and free the corresponding
718                   shadowed buffer */
719		jh = commit_transaction->t_shadow_list->b_tprev;
720		bh = hfsplus_jh2bh(jh);
721		clear_bit(BH_HFSPLUS_JWrite, &bh->b_state);
722		HFSPLUS_J_ASSERT_BH(bh, buffer_hfsplus_jbddirty(bh));
723
724		/* The metadata is now released for reuse, but we need
725                   to remember it against this transaction so that when
726                   we finally commit, we can do any checkpointing
727                   required. */
728		HFSPLUS_JBUFFER_TRACE(jh, "file as HFSPLUS_BJ_Forget");
729		hfsplus_jbd_file_buffer(jh, commit_transaction, HFSPLUS_BJ_Forget);
730		/* Wake up any transactions which were waiting for this
731		   IO to complete */
732		wake_up_bit(&bh->b_state, BH_HFSPLUS_Unshadow);
733		HFSPLUS_JBUFFER_TRACE(jh, "brelse shadowed buffer");
734		__brelse(bh);
735	}
736
737	HFSPLUS_J_ASSERT (commit_transaction->t_shadow_list == NULL);
738
739	dprint(DBG_JCOMMIT, "JBD: commit phase 5\n");
740
741	/* Here we wait for the revoke record and descriptor record buffers */
742 wait_for_ctlbuf:
743	while (commit_transaction->t_log_list != NULL) {
744		struct buffer_head *bh;
745
746		jh = commit_transaction->t_log_list->b_tprev;
747		bh = hfsplus_jh2bh(jh);
748		if (buffer_locked(bh)) {
749			wait_on_buffer(bh);
750			goto wait_for_ctlbuf;
751		}
752		if (cond_resched())
753			goto wait_for_ctlbuf;
754
755		if (unlikely(!buffer_uptodate(bh)))
756			err = -EIO;
757
758		HFSPLUS_BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile");
759		clear_buffer_hfsplus_jbd_jwrite(bh);
760		hfsplus_jbd_unfile_buffer(journal, jh);
761		hfsplus_jbd_put_journal_head(jh);
762		__brelse(bh);		/* One for getblk */
763		/* AKPM: bforget here */
764	}
765
766	dprint(DBG_JCOMMIT, "JBD: commit phase 6\n");
767
768	if (hfsplus_jbd_write_commit_record(journal, commit_transaction))
769		err = -EIO;
770
771	if (err)
772		__hfsplus_jbd_abort_hard(journal);
773
774	/* End of a transaction!  Finally, we can do checkpoint
775           processing: any buffers committed as a result of this
776           transaction can be removed from any checkpoint list it was on
777           before. */
778
779	dprint(DBG_JCOMMIT, "JBD: commit phase 7\n");
780
781	HFSPLUS_J_ASSERT(commit_transaction->t_sync_datalist == NULL);
782	HFSPLUS_J_ASSERT(commit_transaction->t_buffers == NULL);
783	HFSPLUS_J_ASSERT(commit_transaction->t_checkpoint_list == NULL);
784	HFSPLUS_J_ASSERT(commit_transaction->t_iobuf_list == NULL);
785	HFSPLUS_J_ASSERT(commit_transaction->t_shadow_list == NULL);
786	HFSPLUS_J_ASSERT(commit_transaction->t_log_list == NULL);
787
788restart_loop:
789	/*
790	 * As there are other places (hfsplus_jbd_unmap_buffer()) adding buffers
791	 * to this list we have to be careful and hold the j_list_lock.
792	 */
793	spin_lock(&journal->j_list_lock);
794	while (commit_transaction->t_forget) {
795		hfsplus_transaction_t *cp_transaction;
796		struct buffer_head *bh;
797
798		jh = commit_transaction->t_forget;
799		spin_unlock(&journal->j_list_lock);
800		bh = hfsplus_jh2bh(jh);
801		hfsplus_jbd_lock_bh_state(bh);
802		HFSPLUS_J_ASSERT_JH(jh,	jh->b_transaction == commit_transaction ||
803			jh->b_transaction == journal->j_running_transaction);
804
805		/*
806		 * If there is undo-protected committed data against
807		 * this buffer, then we can remove it now.  If it is a
808		 * buffer needing such protection, the old frozen_data
809		 * field now points to a committed version of the
810		 * buffer, so rotate that field to the new committed
811		 * data.
812		 *
813		 * Otherwise, we can just throw away the frozen data now.
814		 */
815		if (jh->b_committed_data) {
816			kfree(jh->b_committed_data);
817			jh->b_committed_data = NULL;
818			if (jh->b_frozen_data) {
819				jh->b_committed_data = jh->b_frozen_data;
820				jh->b_frozen_data = NULL;
821			}
822		} else if (jh->b_frozen_data) {
823			kfree(jh->b_frozen_data);
824			jh->b_frozen_data = NULL;
825		}
826
827		spin_lock(&journal->j_list_lock);
828		cp_transaction = jh->b_cp_transaction;
829		if (cp_transaction) {
830			HFSPLUS_JBUFFER_TRACE(jh, "remove from old cp transaction");
831			__hfsplus_jbd_remove_checkpoint(jh);
832		}
833
834		/* Only re-checkpoint the buffer_head if it is marked
835		 * dirty.  If the buffer was added to the HFSPLUS_BJ_Forget list
836		 * by hfsplus_jbd_forget, it may no longer be dirty and
837		 * there's no point in keeping a checkpoint record for
838		 * it. */
839
840		/* A buffer which has been freed while still being
841		 * journaled by a previous transaction may end up still
842		 * being dirty here, but we want to avoid writing back
843		 * that buffer in the future now that the last use has
844		 * been committed.  That's not only a performance gain,
845		 * it also stops aliasing problems if the buffer is left
846		 * behind for writeback and gets reallocated for another
847		 * use in a different page. */
848		if (buffer_hfsplus_jbd_freed(bh)) {
849			clear_buffer_hfsplus_jbd_freed(bh);
850			clear_buffer_hfsplus_jbddirty(bh);
851		}
852
853		if (buffer_hfsplus_jbddirty(bh)) {
854			HFSPLUS_JBUFFER_TRACE(jh, "add to new checkpointing trans");
855			__hfsplus_jbd_insert_checkpoint(jh, commit_transaction);
856			HFSPLUS_JBUFFER_TRACE(jh, "refile for checkpoint writeback");
857			__hfsplus_jbd_refile_buffer(jh);
858			hfsplus_jbd_unlock_bh_state(bh);
859		} else {
860			HFSPLUS_J_ASSERT_BH(bh, !buffer_dirty(bh));
861			HFSPLUS_J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
862			__hfsplus_jbd_unfile_buffer(jh);
863			hfsplus_jbd_unlock_bh_state(bh);
864			hfsplus_jbd_remove_journal_head(bh);  /* needs a brelse */
865			release_buffer_page(bh);
866		}
867		cond_resched_lock(&journal->j_list_lock);
868	}
869	spin_unlock(&journal->j_list_lock);
870	/*
871	 * This is a bit sleazy.  We borrow j_list_lock to protect
872	 * journal->j_committing_transaction in __hfsplus_jbd_remove_checkpoint.
873	 * Really, __hfsplus_jbd_remove_checkpoint should be using j_state_lock but
874	 * it's a bit hassle to hold that across __hfsplus_jbd_remove_checkpoint
875	 */
876	spin_lock(&journal->j_state_lock);
877	spin_lock(&journal->j_list_lock);
878	/*
879	 * Now recheck if some buffers did not get attached to the transaction
880	 * while the lock was dropped...
881	 */
882	if (commit_transaction->t_forget) {
883		spin_unlock(&journal->j_list_lock);
884		spin_unlock(&journal->j_state_lock);
885		goto restart_loop;
886	}
887
888	/* Done with this transaction! */
889
890	dprint(DBG_JCOMMIT, "JBD: commit phase 8\n");
891
892	HFSPLUS_J_ASSERT(commit_transaction->t_state == HFSPLUS_T_COMMIT);
893
894	commit_transaction->t_state = HFSPLUS_T_FINISHED;
895	HFSPLUS_J_ASSERT(commit_transaction == journal->j_committing_transaction);
896	journal->j_commit_sequence = commit_transaction->t_tid;
897	journal->j_committing_transaction = NULL;
898	spin_unlock(&journal->j_state_lock);
899
900	if (commit_transaction->t_checkpoint_list == NULL) {
901		__hfsplus_jbd_drop_transaction(journal, commit_transaction);
902	} else {
903		if (journal->j_checkpoint_transactions == NULL) {
904			journal->j_checkpoint_transactions = commit_transaction;
905			commit_transaction->t_cpnext = commit_transaction;
906			commit_transaction->t_cpprev = commit_transaction;
907		} else {
908			commit_transaction->t_cpnext =
909				journal->j_checkpoint_transactions;
910			commit_transaction->t_cpprev =
911				commit_transaction->t_cpnext->t_cpprev;
912			commit_transaction->t_cpnext->t_cpprev =
913				commit_transaction;
914			commit_transaction->t_cpprev->t_cpnext =
915				commit_transaction;
916		}
917	}
918	spin_unlock(&journal->j_list_lock);
919
920	dprint(DBG_JCOMMIT, "JBD: commit %d complete, head %d\n", journal->j_commit_sequence, journal->j_tail_sequence);
921
922	wake_up(&journal->j_wait_done_commit);
923}
924