1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 *   Copyright (C) International Business Machines Corp., 2000-2004
4 *   Portions Copyright (C) Christoph Hellwig, 2001-2002
5 */
6
7/*
8 *	jfs_logmgr.c: log manager
9 *
10 * for related information, see transaction manager (jfs_txnmgr.c), and
11 * recovery manager (jfs_logredo.c).
12 *
13 * note: for detail, RTFS.
14 *
15 *	log buffer manager:
16 * special purpose buffer manager supporting log i/o requirements.
17 * per log serial pageout of logpage
18 * queuing i/o requests and redrive i/o at iodone
19 * maintain current logpage buffer
20 * no caching since append only
21 * appropriate jfs buffer cache buffers as needed
22 *
23 *	group commit:
24 * transactions which wrote COMMIT records in the same in-memory
25 * log page during the pageout of previous/current log page(s) are
26 * committed together by the pageout of the page.
27 *
28 *	TBD lazy commit:
29 * transactions are committed asynchronously when the log page
30 * containing it COMMIT is paged out when it becomes full;
31 *
32 *	serialization:
33 * . a per log lock serialize log write.
34 * . a per log lock serialize group commit.
35 * . a per log lock serialize log open/close;
36 *
37 *	TBD log integrity:
38 * careful-write (ping-pong) of last logpage to recover from crash
39 * in overwrite.
40 * detection of split (out-of-order) write of physical sectors
41 * of last logpage via timestamp at end of each sector
42 * with its mirror data array at trailer).
43 *
44 *	alternatives:
45 * lsn - 64-bit monotonically increasing integer vs
46 * 32-bit lspn and page eor.
47 */
48
49#include <linux/fs.h>
50#include <linux/blkdev.h>
51#include <linux/interrupt.h>
52#include <linux/completion.h>
53#include <linux/kthread.h>
54#include <linux/buffer_head.h>		/* for sync_blockdev() */
55#include <linux/bio.h>
56#include <linux/freezer.h>
57#include <linux/export.h>
58#include <linux/delay.h>
59#include <linux/mutex.h>
60#include <linux/seq_file.h>
61#include <linux/slab.h>
62#include "jfs_incore.h"
63#include "jfs_filsys.h"
64#include "jfs_metapage.h"
65#include "jfs_superblock.h"
66#include "jfs_txnmgr.h"
67#include "jfs_debug.h"
68
69
70/*
71 * lbuf's ready to be redriven.  Protected by log_redrive_lock (jfsIO thread)
72 */
73static struct lbuf *log_redrive_list;
74static DEFINE_SPINLOCK(log_redrive_lock);
75
76
77/*
78 *	log read/write serialization (per log)
79 */
80#define LOG_LOCK_INIT(log)	mutex_init(&(log)->loglock)
81#define LOG_LOCK(log)		mutex_lock(&((log)->loglock))
82#define LOG_UNLOCK(log)		mutex_unlock(&((log)->loglock))
83
84
85/*
86 *	log group commit serialization (per log)
87 */
88
89#define LOGGC_LOCK_INIT(log)	spin_lock_init(&(log)->gclock)
90#define LOGGC_LOCK(log)		spin_lock_irq(&(log)->gclock)
91#define LOGGC_UNLOCK(log)	spin_unlock_irq(&(log)->gclock)
92#define LOGGC_WAKEUP(tblk)	wake_up_all(&(tblk)->gcwait)
93
94/*
95 *	log sync serialization (per log)
96 */
97#define	LOGSYNC_DELTA(logsize)		min((logsize)/8, 128*LOGPSIZE)
98#define	LOGSYNC_BARRIER(logsize)	((logsize)/4)
99/*
100#define	LOGSYNC_DELTA(logsize)		min((logsize)/4, 256*LOGPSIZE)
101#define	LOGSYNC_BARRIER(logsize)	((logsize)/2)
102*/
103
104
105/*
106 *	log buffer cache synchronization
107 */
108static DEFINE_SPINLOCK(jfsLCacheLock);
109
110#define	LCACHE_LOCK(flags)	spin_lock_irqsave(&jfsLCacheLock, flags)
111#define	LCACHE_UNLOCK(flags)	spin_unlock_irqrestore(&jfsLCacheLock, flags)
112
113/*
114 * See __SLEEP_COND in jfs_locks.h
115 */
116#define LCACHE_SLEEP_COND(wq, cond, flags)	\
117do {						\
118	if (cond)				\
119		break;				\
120	__SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \
121} while (0)
122
123#define	LCACHE_WAKEUP(event)	wake_up(event)
124
125
126/*
127 *	lbuf buffer cache (lCache) control
128 */
129/* log buffer manager pageout control (cumulative, inclusive) */
130#define	lbmREAD		0x0001
131#define	lbmWRITE	0x0002	/* enqueue at tail of write queue;
132				 * init pageout if at head of queue;
133				 */
134#define	lbmRELEASE	0x0004	/* remove from write queue
135				 * at completion of pageout;
136				 * do not free/recycle it yet:
137				 * caller will free it;
138				 */
139#define	lbmSYNC		0x0008	/* do not return to freelist
140				 * when removed from write queue;
141				 */
142#define lbmFREE		0x0010	/* return to freelist
143				 * at completion of pageout;
144				 * the buffer may be recycled;
145				 */
146#define	lbmDONE		0x0020
147#define	lbmERROR	0x0040
148#define lbmGC		0x0080	/* lbmIODone to perform post-GC processing
149				 * of log page
150				 */
151#define lbmDIRECT	0x0100
152
153/*
154 * Global list of active external journals
155 */
156static LIST_HEAD(jfs_external_logs);
157static struct jfs_log *dummy_log;
158static DEFINE_MUTEX(jfs_log_mutex);
159
160/*
161 * forward references
162 */
163static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk,
164			 struct lrd * lrd, struct tlock * tlck);
165
166static int lmNextPage(struct jfs_log * log);
167static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
168			   int activate);
169
170static int open_inline_log(struct super_block *sb);
171static int open_dummy_log(struct super_block *sb);
172static int lbmLogInit(struct jfs_log * log);
173static void lbmLogShutdown(struct jfs_log * log);
174static struct lbuf *lbmAllocate(struct jfs_log * log, int);
175static void lbmFree(struct lbuf * bp);
176static void lbmfree(struct lbuf * bp);
177static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp);
178static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, int cant_block);
179static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag);
180static int lbmIOWait(struct lbuf * bp, int flag);
181static bio_end_io_t lbmIODone;
182static void lbmStartIO(struct lbuf * bp);
183static void lmGCwrite(struct jfs_log * log, int cant_block);
184static int lmLogSync(struct jfs_log * log, int hard_sync);
185
186
187
188/*
189 *	statistics
190 */
191#ifdef CONFIG_JFS_STATISTICS
192static struct lmStat {
193	uint commit;		/* # of commit */
194	uint pagedone;		/* # of page written */
195	uint submitted;		/* # of pages submitted */
196	uint full_page;		/* # of full pages submitted */
197	uint partial_page;	/* # of partial pages submitted */
198} lmStat;
199#endif
200
201static void write_special_inodes(struct jfs_log *log,
202				 int (*writer)(struct address_space *))
203{
204	struct jfs_sb_info *sbi;
205
206	list_for_each_entry(sbi, &log->sb_list, log_list) {
207		writer(sbi->ipbmap->i_mapping);
208		writer(sbi->ipimap->i_mapping);
209		writer(sbi->direct_inode->i_mapping);
210	}
211}
212
213/*
214 * NAME:	lmLog()
215 *
216 * FUNCTION:	write a log record;
217 *
218 * PARAMETER:
219 *
220 * RETURN:	lsn - offset to the next log record to write (end-of-log);
221 *		-1  - error;
222 *
223 * note: todo: log error handler
224 */
225int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
226	  struct tlock * tlck)
227{
228	int lsn;
229	int diffp, difft;
230	struct metapage *mp = NULL;
231	unsigned long flags;
232
233	jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p",
234		 log, tblk, lrd, tlck);
235
236	LOG_LOCK(log);
237
238	/* log by (out-of-transaction) JFS ? */
239	if (tblk == NULL)
240		goto writeRecord;
241
242	/* log from page ? */
243	if (tlck == NULL ||
244	    tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL)
245		goto writeRecord;
246
247	/*
248	 *	initialize/update page/transaction recovery lsn
249	 */
250	lsn = log->lsn;
251
252	LOGSYNC_LOCK(log, flags);
253
254	/*
255	 * initialize page lsn if first log write of the page
256	 */
257	if (mp->lsn == 0) {
258		mp->log = log;
259		mp->lsn = lsn;
260		log->count++;
261
262		/* insert page at tail of logsynclist */
263		list_add_tail(&mp->synclist, &log->synclist);
264	}
265
266	/*
267	 *	initialize/update lsn of tblock of the page
268	 *
269	 * transaction inherits oldest lsn of pages associated
270	 * with allocation/deallocation of resources (their
271	 * log records are used to reconstruct allocation map
272	 * at recovery time: inode for inode allocation map,
273	 * B+-tree index of extent descriptors for block
274	 * allocation map);
275	 * allocation map pages inherit transaction lsn at
276	 * commit time to allow forwarding log syncpt past log
277	 * records associated with allocation/deallocation of
278	 * resources only after persistent map of these map pages
279	 * have been updated and propagated to home.
280	 */
281	/*
282	 * initialize transaction lsn:
283	 */
284	if (tblk->lsn == 0) {
285		/* inherit lsn of its first page logged */
286		tblk->lsn = mp->lsn;
287		log->count++;
288
289		/* insert tblock after the page on logsynclist */
290		list_add(&tblk->synclist, &mp->synclist);
291	}
292	/*
293	 * update transaction lsn:
294	 */
295	else {
296		/* inherit oldest/smallest lsn of page */
297		logdiff(diffp, mp->lsn, log);
298		logdiff(difft, tblk->lsn, log);
299		if (diffp < difft) {
300			/* update tblock lsn with page lsn */
301			tblk->lsn = mp->lsn;
302
303			/* move tblock after page on logsynclist */
304			list_move(&tblk->synclist, &mp->synclist);
305		}
306	}
307
308	LOGSYNC_UNLOCK(log, flags);
309
310	/*
311	 *	write the log record
312	 */
313      writeRecord:
314	lsn = lmWriteRecord(log, tblk, lrd, tlck);
315
316	/*
317	 * forward log syncpt if log reached next syncpt trigger
318	 */
319	logdiff(diffp, lsn, log);
320	if (diffp >= log->nextsync)
321		lsn = lmLogSync(log, 0);
322
323	/* update end-of-log lsn */
324	log->lsn = lsn;
325
326	LOG_UNLOCK(log);
327
328	/* return end-of-log address */
329	return lsn;
330}
331
332/*
333 * NAME:	lmWriteRecord()
334 *
335 * FUNCTION:	move the log record to current log page
336 *
337 * PARAMETER:	cd	- commit descriptor
338 *
339 * RETURN:	end-of-log address
340 *
341 * serialization: LOG_LOCK() held on entry/exit
342 */
343static int
344lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
345	      struct tlock * tlck)
346{
347	int lsn = 0;		/* end-of-log address */
348	struct lbuf *bp;	/* dst log page buffer */
349	struct logpage *lp;	/* dst log page */
350	caddr_t dst;		/* destination address in log page */
351	int dstoffset;		/* end-of-log offset in log page */
352	int freespace;		/* free space in log page */
353	caddr_t p;		/* src meta-data page */
354	caddr_t src;
355	int srclen;
356	int nbytes;		/* number of bytes to move */
357	int i;
358	int len;
359	struct linelock *linelock;
360	struct lv *lv;
361	struct lvd *lvd;
362	int l2linesize;
363
364	len = 0;
365
366	/* retrieve destination log page to write */
367	bp = (struct lbuf *) log->bp;
368	lp = (struct logpage *) bp->l_ldata;
369	dstoffset = log->eor;
370
371	/* any log data to write ? */
372	if (tlck == NULL)
373		goto moveLrd;
374
375	/*
376	 *	move log record data
377	 */
378	/* retrieve source meta-data page to log */
379	if (tlck->flag & tlckPAGELOCK) {
380		p = (caddr_t) (tlck->mp->data);
381		linelock = (struct linelock *) & tlck->lock;
382	}
383	/* retrieve source in-memory inode to log */
384	else if (tlck->flag & tlckINODELOCK) {
385		if (tlck->type & tlckDTREE)
386			p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot;
387		else
388			p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot;
389		linelock = (struct linelock *) & tlck->lock;
390	}
391	else {
392		jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck);
393		return 0;	/* Probably should trap */
394	}
395	l2linesize = linelock->l2linesize;
396
397      moveData:
398	ASSERT(linelock->index <= linelock->maxcnt);
399
400	lv = linelock->lv;
401	for (i = 0; i < linelock->index; i++, lv++) {
402		if (lv->length == 0)
403			continue;
404
405		/* is page full ? */
406		if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) {
407			/* page become full: move on to next page */
408			lmNextPage(log);
409
410			bp = log->bp;
411			lp = (struct logpage *) bp->l_ldata;
412			dstoffset = LOGPHDRSIZE;
413		}
414
415		/*
416		 * move log vector data
417		 */
418		src = (u8 *) p + (lv->offset << l2linesize);
419		srclen = lv->length << l2linesize;
420		len += srclen;
421		while (srclen > 0) {
422			freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
423			nbytes = min(freespace, srclen);
424			dst = (caddr_t) lp + dstoffset;
425			memcpy(dst, src, nbytes);
426			dstoffset += nbytes;
427
428			/* is page not full ? */
429			if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
430				break;
431
432			/* page become full: move on to next page */
433			lmNextPage(log);
434
435			bp = (struct lbuf *) log->bp;
436			lp = (struct logpage *) bp->l_ldata;
437			dstoffset = LOGPHDRSIZE;
438
439			srclen -= nbytes;
440			src += nbytes;
441		}
442
443		/*
444		 * move log vector descriptor
445		 */
446		len += 4;
447		lvd = (struct lvd *) ((caddr_t) lp + dstoffset);
448		lvd->offset = cpu_to_le16(lv->offset);
449		lvd->length = cpu_to_le16(lv->length);
450		dstoffset += 4;
451		jfs_info("lmWriteRecord: lv offset:%d length:%d",
452			 lv->offset, lv->length);
453	}
454
455	if ((i = linelock->next)) {
456		linelock = (struct linelock *) lid_to_tlock(i);
457		goto moveData;
458	}
459
460	/*
461	 *	move log record descriptor
462	 */
463      moveLrd:
464	lrd->length = cpu_to_le16(len);
465
466	src = (caddr_t) lrd;
467	srclen = LOGRDSIZE;
468
469	while (srclen > 0) {
470		freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
471		nbytes = min(freespace, srclen);
472		dst = (caddr_t) lp + dstoffset;
473		memcpy(dst, src, nbytes);
474
475		dstoffset += nbytes;
476		srclen -= nbytes;
477
478		/* are there more to move than freespace of page ? */
479		if (srclen)
480			goto pageFull;
481
482		/*
483		 * end of log record descriptor
484		 */
485
486		/* update last log record eor */
487		log->eor = dstoffset;
488		bp->l_eor = dstoffset;
489		lsn = (log->page << L2LOGPSIZE) + dstoffset;
490
491		if (lrd->type & cpu_to_le16(LOG_COMMIT)) {
492			tblk->clsn = lsn;
493			jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn,
494				 bp->l_eor);
495
496			INCREMENT(lmStat.commit);	/* # of commit */
497
498			/*
499			 * enqueue tblock for group commit:
500			 *
501			 * enqueue tblock of non-trivial/synchronous COMMIT
502			 * at tail of group commit queue
503			 * (trivial/asynchronous COMMITs are ignored by
504			 * group commit.)
505			 */
506			LOGGC_LOCK(log);
507
508			/* init tblock gc state */
509			tblk->flag = tblkGC_QUEUE;
510			tblk->bp = log->bp;
511			tblk->pn = log->page;
512			tblk->eor = log->eor;
513
514			/* enqueue transaction to commit queue */
515			list_add_tail(&tblk->cqueue, &log->cqueue);
516
517			LOGGC_UNLOCK(log);
518		}
519
520		jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x",
521			le16_to_cpu(lrd->type), log->bp, log->page, dstoffset);
522
523		/* page not full ? */
524		if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
525			return lsn;
526
527	      pageFull:
528		/* page become full: move on to next page */
529		lmNextPage(log);
530
531		bp = (struct lbuf *) log->bp;
532		lp = (struct logpage *) bp->l_ldata;
533		dstoffset = LOGPHDRSIZE;
534		src += nbytes;
535	}
536
537	return lsn;
538}
539
540
541/*
542 * NAME:	lmNextPage()
543 *
544 * FUNCTION:	write current page and allocate next page.
545 *
546 * PARAMETER:	log
547 *
548 * RETURN:	0
549 *
550 * serialization: LOG_LOCK() held on entry/exit
551 */
552static int lmNextPage(struct jfs_log * log)
553{
554	struct logpage *lp;
555	int lspn;		/* log sequence page number */
556	int pn;			/* current page number */
557	struct lbuf *bp;
558	struct lbuf *nextbp;
559	struct tblock *tblk;
560
561	/* get current log page number and log sequence page number */
562	pn = log->page;
563	bp = log->bp;
564	lp = (struct logpage *) bp->l_ldata;
565	lspn = le32_to_cpu(lp->h.page);
566
567	LOGGC_LOCK(log);
568
569	/*
570	 *	write or queue the full page at the tail of write queue
571	 */
572	/* get the tail tblk on commit queue */
573	if (list_empty(&log->cqueue))
574		tblk = NULL;
575	else
576		tblk = list_entry(log->cqueue.prev, struct tblock, cqueue);
577
578	/* every tblk who has COMMIT record on the current page,
579	 * and has not been committed, must be on commit queue
580	 * since tblk is queued at commit queueu at the time
581	 * of writing its COMMIT record on the page before
582	 * page becomes full (even though the tblk thread
583	 * who wrote COMMIT record may have been suspended
584	 * currently);
585	 */
586
587	/* is page bound with outstanding tail tblk ? */
588	if (tblk && tblk->pn == pn) {
589		/* mark tblk for end-of-page */
590		tblk->flag |= tblkGC_EOP;
591
592		if (log->cflag & logGC_PAGEOUT) {
593			/* if page is not already on write queue,
594			 * just enqueue (no lbmWRITE to prevent redrive)
595			 * buffer to wqueue to ensure correct serial order
596			 * of the pages since log pages will be added
597			 * continuously
598			 */
599			if (bp->l_wqnext == NULL)
600				lbmWrite(log, bp, 0, 0);
601		} else {
602			/*
603			 * No current GC leader, initiate group commit
604			 */
605			log->cflag |= logGC_PAGEOUT;
606			lmGCwrite(log, 0);
607		}
608	}
609	/* page is not bound with outstanding tblk:
610	 * init write or mark it to be redriven (lbmWRITE)
611	 */
612	else {
613		/* finalize the page */
614		bp->l_ceor = bp->l_eor;
615		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
616		lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0);
617	}
618	LOGGC_UNLOCK(log);
619
620	/*
621	 *	allocate/initialize next page
622	 */
623	/* if log wraps, the first data page of log is 2
624	 * (0 never used, 1 is superblock).
625	 */
626	log->page = (pn == log->size - 1) ? 2 : pn + 1;
627	log->eor = LOGPHDRSIZE;	/* ? valid page empty/full at logRedo() */
628
629	/* allocate/initialize next log page buffer */
630	nextbp = lbmAllocate(log, log->page);
631	nextbp->l_eor = log->eor;
632	log->bp = nextbp;
633
634	/* initialize next log page */
635	lp = (struct logpage *) nextbp->l_ldata;
636	lp->h.page = lp->t.page = cpu_to_le32(lspn + 1);
637	lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
638
639	return 0;
640}
641
642
643/*
644 * NAME:	lmGroupCommit()
645 *
646 * FUNCTION:	group commit
647 *	initiate pageout of the pages with COMMIT in the order of
648 *	page number - redrive pageout of the page at the head of
649 *	pageout queue until full page has been written.
650 *
651 * RETURN:
652 *
653 * NOTE:
654 *	LOGGC_LOCK serializes log group commit queue, and
655 *	transaction blocks on the commit queue.
656 *	N.B. LOG_LOCK is NOT held during lmGroupCommit().
657 */
658int lmGroupCommit(struct jfs_log * log, struct tblock * tblk)
659{
660	int rc = 0;
661
662	LOGGC_LOCK(log);
663
664	/* group committed already ? */
665	if (tblk->flag & tblkGC_COMMITTED) {
666		if (tblk->flag & tblkGC_ERROR)
667			rc = -EIO;
668
669		LOGGC_UNLOCK(log);
670		return rc;
671	}
672	jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc);
673
674	if (tblk->xflag & COMMIT_LAZY)
675		tblk->flag |= tblkGC_LAZY;
676
677	if ((!(log->cflag & logGC_PAGEOUT)) && (!list_empty(&log->cqueue)) &&
678	    (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag)
679	     || jfs_tlocks_low)) {
680		/*
681		 * No pageout in progress
682		 *
683		 * start group commit as its group leader.
684		 */
685		log->cflag |= logGC_PAGEOUT;
686
687		lmGCwrite(log, 0);
688	}
689
690	if (tblk->xflag & COMMIT_LAZY) {
691		/*
692		 * Lazy transactions can leave now
693		 */
694		LOGGC_UNLOCK(log);
695		return 0;
696	}
697
698	/* lmGCwrite gives up LOGGC_LOCK, check again */
699
700	if (tblk->flag & tblkGC_COMMITTED) {
701		if (tblk->flag & tblkGC_ERROR)
702			rc = -EIO;
703
704		LOGGC_UNLOCK(log);
705		return rc;
706	}
707
708	/* upcount transaction waiting for completion
709	 */
710	log->gcrtc++;
711	tblk->flag |= tblkGC_READY;
712
713	__SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED),
714		     LOGGC_LOCK(log), LOGGC_UNLOCK(log));
715
716	/* removed from commit queue */
717	if (tblk->flag & tblkGC_ERROR)
718		rc = -EIO;
719
720	LOGGC_UNLOCK(log);
721	return rc;
722}
723
724/*
725 * NAME:	lmGCwrite()
726 *
727 * FUNCTION:	group commit write
728 *	initiate write of log page, building a group of all transactions
729 *	with commit records on that page.
730 *
731 * RETURN:	None
732 *
733 * NOTE:
734 *	LOGGC_LOCK must be held by caller.
735 *	N.B. LOG_LOCK is NOT held during lmGroupCommit().
736 */
737static void lmGCwrite(struct jfs_log * log, int cant_write)
738{
739	struct lbuf *bp;
740	struct logpage *lp;
741	int gcpn;		/* group commit page number */
742	struct tblock *tblk;
743	struct tblock *xtblk = NULL;
744
745	/*
746	 * build the commit group of a log page
747	 *
748	 * scan commit queue and make a commit group of all
749	 * transactions with COMMIT records on the same log page.
750	 */
751	/* get the head tblk on the commit queue */
752	gcpn = list_entry(log->cqueue.next, struct tblock, cqueue)->pn;
753
754	list_for_each_entry(tblk, &log->cqueue, cqueue) {
755		if (tblk->pn != gcpn)
756			break;
757
758		xtblk = tblk;
759
760		/* state transition: (QUEUE, READY) -> COMMIT */
761		tblk->flag |= tblkGC_COMMIT;
762	}
763	tblk = xtblk;		/* last tblk of the page */
764
765	/*
766	 * pageout to commit transactions on the log page.
767	 */
768	bp = (struct lbuf *) tblk->bp;
769	lp = (struct logpage *) bp->l_ldata;
770	/* is page already full ? */
771	if (tblk->flag & tblkGC_EOP) {
772		/* mark page to free at end of group commit of the page */
773		tblk->flag &= ~tblkGC_EOP;
774		tblk->flag |= tblkGC_FREE;
775		bp->l_ceor = bp->l_eor;
776		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
777		lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC,
778			 cant_write);
779		INCREMENT(lmStat.full_page);
780	}
781	/* page is not yet full */
782	else {
783		bp->l_ceor = tblk->eor;	/* ? bp->l_ceor = bp->l_eor; */
784		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
785		lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write);
786		INCREMENT(lmStat.partial_page);
787	}
788}
789
790/*
791 * NAME:	lmPostGC()
792 *
793 * FUNCTION:	group commit post-processing
794 *	Processes transactions after their commit records have been written
795 *	to disk, redriving log I/O if necessary.
796 *
797 * RETURN:	None
798 *
799 * NOTE:
800 *	This routine is called a interrupt time by lbmIODone
801 */
802static void lmPostGC(struct lbuf * bp)
803{
804	unsigned long flags;
805	struct jfs_log *log = bp->l_log;
806	struct logpage *lp;
807	struct tblock *tblk, *temp;
808
809	//LOGGC_LOCK(log);
810	spin_lock_irqsave(&log->gclock, flags);
811	/*
812	 * current pageout of group commit completed.
813	 *
814	 * remove/wakeup transactions from commit queue who were
815	 * group committed with the current log page
816	 */
817	list_for_each_entry_safe(tblk, temp, &log->cqueue, cqueue) {
818		if (!(tblk->flag & tblkGC_COMMIT))
819			break;
820		/* if transaction was marked GC_COMMIT then
821		 * it has been shipped in the current pageout
822		 * and made it to disk - it is committed.
823		 */
824
825		if (bp->l_flag & lbmERROR)
826			tblk->flag |= tblkGC_ERROR;
827
828		/* remove it from the commit queue */
829		list_del(&tblk->cqueue);
830		tblk->flag &= ~tblkGC_QUEUE;
831
832		if (tblk == log->flush_tblk) {
833			/* we can stop flushing the log now */
834			clear_bit(log_FLUSH, &log->flag);
835			log->flush_tblk = NULL;
836		}
837
838		jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk,
839			 tblk->flag);
840
841		if (!(tblk->xflag & COMMIT_FORCE))
842			/*
843			 * Hand tblk over to lazy commit thread
844			 */
845			txLazyUnlock(tblk);
846		else {
847			/* state transition: COMMIT -> COMMITTED */
848			tblk->flag |= tblkGC_COMMITTED;
849
850			if (tblk->flag & tblkGC_READY)
851				log->gcrtc--;
852
853			LOGGC_WAKEUP(tblk);
854		}
855
856		/* was page full before pageout ?
857		 * (and this is the last tblk bound with the page)
858		 */
859		if (tblk->flag & tblkGC_FREE)
860			lbmFree(bp);
861		/* did page become full after pageout ?
862		 * (and this is the last tblk bound with the page)
863		 */
864		else if (tblk->flag & tblkGC_EOP) {
865			/* finalize the page */
866			lp = (struct logpage *) bp->l_ldata;
867			bp->l_ceor = bp->l_eor;
868			lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
869			jfs_info("lmPostGC: calling lbmWrite");
870			lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE,
871				 1);
872		}
873
874	}
875
876	/* are there any transactions who have entered lnGroupCommit()
877	 * (whose COMMITs are after that of the last log page written.
878	 * They are waiting for new group commit (above at (SLEEP 1))
879	 * or lazy transactions are on a full (queued) log page,
880	 * select the latest ready transaction as new group leader and
881	 * wake her up to lead her group.
882	 */
883	if ((!list_empty(&log->cqueue)) &&
884	    ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) ||
885	     test_bit(log_FLUSH, &log->flag) || jfs_tlocks_low))
886		/*
887		 * Call lmGCwrite with new group leader
888		 */
889		lmGCwrite(log, 1);
890
891	/* no transaction are ready yet (transactions are only just
892	 * queued (GC_QUEUE) and not entered for group commit yet).
893	 * the first transaction entering group commit
894	 * will elect herself as new group leader.
895	 */
896	else
897		log->cflag &= ~logGC_PAGEOUT;
898
899	//LOGGC_UNLOCK(log);
900	spin_unlock_irqrestore(&log->gclock, flags);
901	return;
902}
903
904/*
905 * NAME:	lmLogSync()
906 *
907 * FUNCTION:	write log SYNCPT record for specified log
908 *	if new sync address is available
909 *	(normally the case if sync() is executed by back-ground
910 *	process).
911 *	calculate new value of i_nextsync which determines when
912 *	this code is called again.
913 *
914 * PARAMETERS:	log	- log structure
915 *		hard_sync - 1 to force all metadata to be written
916 *
917 * RETURN:	0
918 *
919 * serialization: LOG_LOCK() held on entry/exit
920 */
921static int lmLogSync(struct jfs_log * log, int hard_sync)
922{
923	int logsize;
924	int written;		/* written since last syncpt */
925	int free;		/* free space left available */
926	int delta;		/* additional delta to write normally */
927	int more;		/* additional write granted */
928	struct lrd lrd;
929	int lsn;
930	struct logsyncblk *lp;
931	unsigned long flags;
932
933	/* push dirty metapages out to disk */
934	if (hard_sync)
935		write_special_inodes(log, filemap_fdatawrite);
936	else
937		write_special_inodes(log, filemap_flush);
938
939	/*
940	 *	forward syncpt
941	 */
942	/* if last sync is same as last syncpt,
943	 * invoke sync point forward processing to update sync.
944	 */
945
946	if (log->sync == log->syncpt) {
947		LOGSYNC_LOCK(log, flags);
948		if (list_empty(&log->synclist))
949			log->sync = log->lsn;
950		else {
951			lp = list_entry(log->synclist.next,
952					struct logsyncblk, synclist);
953			log->sync = lp->lsn;
954		}
955		LOGSYNC_UNLOCK(log, flags);
956
957	}
958
959	/* if sync is different from last syncpt,
960	 * write a SYNCPT record with syncpt = sync.
961	 * reset syncpt = sync
962	 */
963	if (log->sync != log->syncpt) {
964		lrd.logtid = 0;
965		lrd.backchain = 0;
966		lrd.type = cpu_to_le16(LOG_SYNCPT);
967		lrd.length = 0;
968		lrd.log.syncpt.sync = cpu_to_le32(log->sync);
969		lsn = lmWriteRecord(log, NULL, &lrd, NULL);
970
971		log->syncpt = log->sync;
972	} else
973		lsn = log->lsn;
974
975	/*
976	 *	setup next syncpt trigger (SWAG)
977	 */
978	logsize = log->logsize;
979
980	logdiff(written, lsn, log);
981	free = logsize - written;
982	delta = LOGSYNC_DELTA(logsize);
983	more = min(free / 2, delta);
984	if (more < 2 * LOGPSIZE) {
985		jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n");
986		/*
987		 *	log wrapping
988		 *
989		 * option 1 - panic ? No.!
990		 * option 2 - shutdown file systems
991		 *	      associated with log ?
992		 * option 3 - extend log ?
993		 * option 4 - second chance
994		 *
995		 * mark log wrapped, and continue.
996		 * when all active transactions are completed,
997		 * mark log valid for recovery.
998		 * if crashed during invalid state, log state
999		 * implies invalid log, forcing fsck().
1000		 */
1001		/* mark log state log wrap in log superblock */
1002		/* log->state = LOGWRAP; */
1003
1004		/* reset sync point computation */
1005		log->syncpt = log->sync = lsn;
1006		log->nextsync = delta;
1007	} else
1008		/* next syncpt trigger = written + more */
1009		log->nextsync = written + more;
1010
1011	/* if number of bytes written from last sync point is more
1012	 * than 1/4 of the log size, stop new transactions from
1013	 * starting until all current transactions are completed
1014	 * by setting syncbarrier flag.
1015	 */
1016	if (!test_bit(log_SYNCBARRIER, &log->flag) &&
1017	    (written > LOGSYNC_BARRIER(logsize)) && log->active) {
1018		set_bit(log_SYNCBARRIER, &log->flag);
1019		jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn,
1020			 log->syncpt);
1021		/*
1022		 * We may have to initiate group commit
1023		 */
1024		jfs_flush_journal(log, 0);
1025	}
1026
1027	return lsn;
1028}
1029
1030/*
1031 * NAME:	jfs_syncpt
1032 *
1033 * FUNCTION:	write log SYNCPT record for specified log
1034 *
1035 * PARAMETERS:	log	  - log structure
1036 *		hard_sync - set to 1 to force metadata to be written
1037 */
1038void jfs_syncpt(struct jfs_log *log, int hard_sync)
1039{	LOG_LOCK(log);
1040	if (!test_bit(log_QUIESCE, &log->flag))
1041		lmLogSync(log, hard_sync);
1042	LOG_UNLOCK(log);
1043}
1044
1045/*
1046 * NAME:	lmLogOpen()
1047 *
1048 * FUNCTION:	open the log on first open;
1049 *	insert filesystem in the active list of the log.
1050 *
1051 * PARAMETER:	ipmnt	- file system mount inode
1052 *		iplog	- log inode (out)
1053 *
1054 * RETURN:
1055 *
1056 * serialization:
1057 */
1058int lmLogOpen(struct super_block *sb)
1059{
1060	int rc;
1061	struct file *bdev_file;
1062	struct jfs_log *log;
1063	struct jfs_sb_info *sbi = JFS_SBI(sb);
1064
1065	if (sbi->flag & JFS_NOINTEGRITY)
1066		return open_dummy_log(sb);
1067
1068	if (sbi->mntflag & JFS_INLINELOG)
1069		return open_inline_log(sb);
1070
1071	mutex_lock(&jfs_log_mutex);
1072	list_for_each_entry(log, &jfs_external_logs, journal_list) {
1073		if (file_bdev(log->bdev_file)->bd_dev == sbi->logdev) {
1074			if (!uuid_equal(&log->uuid, &sbi->loguuid)) {
1075				jfs_warn("wrong uuid on JFS journal");
1076				mutex_unlock(&jfs_log_mutex);
1077				return -EINVAL;
1078			}
1079			/*
1080			 * add file system to log active file system list
1081			 */
1082			if ((rc = lmLogFileSystem(log, sbi, 1))) {
1083				mutex_unlock(&jfs_log_mutex);
1084				return rc;
1085			}
1086			goto journal_found;
1087		}
1088	}
1089
1090	if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL))) {
1091		mutex_unlock(&jfs_log_mutex);
1092		return -ENOMEM;
1093	}
1094	INIT_LIST_HEAD(&log->sb_list);
1095	init_waitqueue_head(&log->syncwait);
1096
1097	/*
1098	 *	external log as separate logical volume
1099	 *
1100	 * file systems to log may have n-to-1 relationship;
1101	 */
1102
1103	bdev_file = bdev_file_open_by_dev(sbi->logdev,
1104			BLK_OPEN_READ | BLK_OPEN_WRITE, log, NULL);
1105	if (IS_ERR(bdev_file)) {
1106		rc = PTR_ERR(bdev_file);
1107		goto free;
1108	}
1109
1110	log->bdev_file = bdev_file;
1111	uuid_copy(&log->uuid, &sbi->loguuid);
1112
1113	/*
1114	 * initialize log:
1115	 */
1116	if ((rc = lmLogInit(log)))
1117		goto close;
1118
1119	list_add(&log->journal_list, &jfs_external_logs);
1120
1121	/*
1122	 * add file system to log active file system list
1123	 */
1124	if ((rc = lmLogFileSystem(log, sbi, 1)))
1125		goto shutdown;
1126
1127journal_found:
1128	LOG_LOCK(log);
1129	list_add(&sbi->log_list, &log->sb_list);
1130	sbi->log = log;
1131	LOG_UNLOCK(log);
1132
1133	mutex_unlock(&jfs_log_mutex);
1134	return 0;
1135
1136	/*
1137	 *	unwind on error
1138	 */
1139      shutdown:		/* unwind lbmLogInit() */
1140	list_del(&log->journal_list);
1141	lbmLogShutdown(log);
1142
1143      close:		/* close external log device */
1144	bdev_fput(bdev_file);
1145
1146      free:		/* free log descriptor */
1147	mutex_unlock(&jfs_log_mutex);
1148	kfree(log);
1149
1150	jfs_warn("lmLogOpen: exit(%d)", rc);
1151	return rc;
1152}
1153
1154static int open_inline_log(struct super_block *sb)
1155{
1156	struct jfs_log *log;
1157	int rc;
1158
1159	if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL)))
1160		return -ENOMEM;
1161	INIT_LIST_HEAD(&log->sb_list);
1162	init_waitqueue_head(&log->syncwait);
1163
1164	set_bit(log_INLINELOG, &log->flag);
1165	log->bdev_file = sb->s_bdev_file;
1166	log->base = addressPXD(&JFS_SBI(sb)->logpxd);
1167	log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
1168	    (L2LOGPSIZE - sb->s_blocksize_bits);
1169	log->l2bsize = sb->s_blocksize_bits;
1170	ASSERT(L2LOGPSIZE >= sb->s_blocksize_bits);
1171
1172	/*
1173	 * initialize log.
1174	 */
1175	if ((rc = lmLogInit(log))) {
1176		kfree(log);
1177		jfs_warn("lmLogOpen: exit(%d)", rc);
1178		return rc;
1179	}
1180
1181	list_add(&JFS_SBI(sb)->log_list, &log->sb_list);
1182	JFS_SBI(sb)->log = log;
1183
1184	return rc;
1185}
1186
1187static int open_dummy_log(struct super_block *sb)
1188{
1189	int rc;
1190
1191	mutex_lock(&jfs_log_mutex);
1192	if (!dummy_log) {
1193		dummy_log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL);
1194		if (!dummy_log) {
1195			mutex_unlock(&jfs_log_mutex);
1196			return -ENOMEM;
1197		}
1198		INIT_LIST_HEAD(&dummy_log->sb_list);
1199		init_waitqueue_head(&dummy_log->syncwait);
1200		dummy_log->no_integrity = 1;
1201		/* Make up some stuff */
1202		dummy_log->base = 0;
1203		dummy_log->size = 1024;
1204		rc = lmLogInit(dummy_log);
1205		if (rc) {
1206			kfree(dummy_log);
1207			dummy_log = NULL;
1208			mutex_unlock(&jfs_log_mutex);
1209			return rc;
1210		}
1211	}
1212
1213	LOG_LOCK(dummy_log);
1214	list_add(&JFS_SBI(sb)->log_list, &dummy_log->sb_list);
1215	JFS_SBI(sb)->log = dummy_log;
1216	LOG_UNLOCK(dummy_log);
1217	mutex_unlock(&jfs_log_mutex);
1218
1219	return 0;
1220}
1221
1222/*
1223 * NAME:	lmLogInit()
1224 *
1225 * FUNCTION:	log initialization at first log open.
1226 *
1227 *	logredo() (or logformat()) should have been run previously.
1228 *	initialize the log from log superblock.
1229 *	set the log state in the superblock to LOGMOUNT and
1230 *	write SYNCPT log record.
1231 *
1232 * PARAMETER:	log	- log structure
1233 *
1234 * RETURN:	0	- if ok
1235 *		-EINVAL	- bad log magic number or superblock dirty
1236 *		error returned from logwait()
1237 *
1238 * serialization: single first open thread
1239 */
1240int lmLogInit(struct jfs_log * log)
1241{
1242	int rc = 0;
1243	struct lrd lrd;
1244	struct logsuper *logsuper;
1245	struct lbuf *bpsuper;
1246	struct lbuf *bp;
1247	struct logpage *lp;
1248	int lsn = 0;
1249
1250	jfs_info("lmLogInit: log:0x%p", log);
1251
1252	/* initialize the group commit serialization lock */
1253	LOGGC_LOCK_INIT(log);
1254
1255	/* allocate/initialize the log write serialization lock */
1256	LOG_LOCK_INIT(log);
1257
1258	LOGSYNC_LOCK_INIT(log);
1259
1260	INIT_LIST_HEAD(&log->synclist);
1261
1262	INIT_LIST_HEAD(&log->cqueue);
1263	log->flush_tblk = NULL;
1264
1265	log->count = 0;
1266
1267	/*
1268	 * initialize log i/o
1269	 */
1270	if ((rc = lbmLogInit(log)))
1271		return rc;
1272
1273	if (!test_bit(log_INLINELOG, &log->flag))
1274		log->l2bsize = L2LOGPSIZE;
1275
1276	/* check for disabled journaling to disk */
1277	if (log->no_integrity) {
1278		/*
1279		 * Journal pages will still be filled.  When the time comes
1280		 * to actually do the I/O, the write is not done, and the
1281		 * endio routine is called directly.
1282		 */
1283		bp = lbmAllocate(log , 0);
1284		log->bp = bp;
1285		bp->l_pn = bp->l_eor = 0;
1286	} else {
1287		/*
1288		 * validate log superblock
1289		 */
1290		if ((rc = lbmRead(log, 1, &bpsuper)))
1291			goto errout10;
1292
1293		logsuper = (struct logsuper *) bpsuper->l_ldata;
1294
1295		if (logsuper->magic != cpu_to_le32(LOGMAGIC)) {
1296			jfs_warn("*** Log Format Error ! ***");
1297			rc = -EINVAL;
1298			goto errout20;
1299		}
1300
1301		/* logredo() should have been run successfully. */
1302		if (logsuper->state != cpu_to_le32(LOGREDONE)) {
1303			jfs_warn("*** Log Is Dirty ! ***");
1304			rc = -EINVAL;
1305			goto errout20;
1306		}
1307
1308		/* initialize log from log superblock */
1309		if (test_bit(log_INLINELOG,&log->flag)) {
1310			if (log->size != le32_to_cpu(logsuper->size)) {
1311				rc = -EINVAL;
1312				goto errout20;
1313			}
1314			jfs_info("lmLogInit: inline log:0x%p base:0x%Lx size:0x%x",
1315				 log, (unsigned long long)log->base, log->size);
1316		} else {
1317			if (!uuid_equal(&logsuper->uuid, &log->uuid)) {
1318				jfs_warn("wrong uuid on JFS log device");
1319				rc = -EINVAL;
1320				goto errout20;
1321			}
1322			log->size = le32_to_cpu(logsuper->size);
1323			log->l2bsize = le32_to_cpu(logsuper->l2bsize);
1324			jfs_info("lmLogInit: external log:0x%p base:0x%Lx size:0x%x",
1325				 log, (unsigned long long)log->base, log->size);
1326		}
1327
1328		log->page = le32_to_cpu(logsuper->end) / LOGPSIZE;
1329		log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page);
1330
1331		/*
1332		 * initialize for log append write mode
1333		 */
1334		/* establish current/end-of-log page/buffer */
1335		if ((rc = lbmRead(log, log->page, &bp)))
1336			goto errout20;
1337
1338		lp = (struct logpage *) bp->l_ldata;
1339
1340		jfs_info("lmLogInit: lsn:0x%x page:%d eor:%d:%d",
1341			 le32_to_cpu(logsuper->end), log->page, log->eor,
1342			 le16_to_cpu(lp->h.eor));
1343
1344		log->bp = bp;
1345		bp->l_pn = log->page;
1346		bp->l_eor = log->eor;
1347
1348		/* if current page is full, move on to next page */
1349		if (log->eor >= LOGPSIZE - LOGPTLRSIZE)
1350			lmNextPage(log);
1351
1352		/*
1353		 * initialize log syncpoint
1354		 */
1355		/*
1356		 * write the first SYNCPT record with syncpoint = 0
1357		 * (i.e., log redo up to HERE !);
1358		 * remove current page from lbm write queue at end of pageout
1359		 * (to write log superblock update), but do not release to
1360		 * freelist;
1361		 */
1362		lrd.logtid = 0;
1363		lrd.backchain = 0;
1364		lrd.type = cpu_to_le16(LOG_SYNCPT);
1365		lrd.length = 0;
1366		lrd.log.syncpt.sync = 0;
1367		lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1368		bp = log->bp;
1369		bp->l_ceor = bp->l_eor;
1370		lp = (struct logpage *) bp->l_ldata;
1371		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1372		lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0);
1373		if ((rc = lbmIOWait(bp, 0)))
1374			goto errout30;
1375
1376		/*
1377		 * update/write superblock
1378		 */
1379		logsuper->state = cpu_to_le32(LOGMOUNT);
1380		log->serial = le32_to_cpu(logsuper->serial) + 1;
1381		logsuper->serial = cpu_to_le32(log->serial);
1382		lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1383		if ((rc = lbmIOWait(bpsuper, lbmFREE)))
1384			goto errout30;
1385	}
1386
1387	/* initialize logsync parameters */
1388	log->logsize = (log->size - 2) << L2LOGPSIZE;
1389	log->lsn = lsn;
1390	log->syncpt = lsn;
1391	log->sync = log->syncpt;
1392	log->nextsync = LOGSYNC_DELTA(log->logsize);
1393
1394	jfs_info("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x",
1395		 log->lsn, log->syncpt, log->sync);
1396
1397	/*
1398	 * initialize for lazy/group commit
1399	 */
1400	log->clsn = lsn;
1401
1402	return 0;
1403
1404	/*
1405	 *	unwind on error
1406	 */
1407      errout30:		/* release log page */
1408	log->wqueue = NULL;
1409	bp->l_wqnext = NULL;
1410	lbmFree(bp);
1411
1412      errout20:		/* release log superblock */
1413	lbmFree(bpsuper);
1414
1415      errout10:		/* unwind lbmLogInit() */
1416	lbmLogShutdown(log);
1417
1418	jfs_warn("lmLogInit: exit(%d)", rc);
1419	return rc;
1420}
1421
1422
1423/*
1424 * NAME:	lmLogClose()
1425 *
1426 * FUNCTION:	remove file system <ipmnt> from active list of log <iplog>
1427 *		and close it on last close.
1428 *
1429 * PARAMETER:	sb	- superblock
1430 *
1431 * RETURN:	errors from subroutines
1432 *
1433 * serialization:
1434 */
1435int lmLogClose(struct super_block *sb)
1436{
1437	struct jfs_sb_info *sbi = JFS_SBI(sb);
1438	struct jfs_log *log = sbi->log;
1439	struct file *bdev_file;
1440	int rc = 0;
1441
1442	jfs_info("lmLogClose: log:0x%p", log);
1443
1444	mutex_lock(&jfs_log_mutex);
1445	LOG_LOCK(log);
1446	list_del(&sbi->log_list);
1447	LOG_UNLOCK(log);
1448	sbi->log = NULL;
1449
1450	/*
1451	 * We need to make sure all of the "written" metapages
1452	 * actually make it to disk
1453	 */
1454	sync_blockdev(sb->s_bdev);
1455
1456	if (test_bit(log_INLINELOG, &log->flag)) {
1457		/*
1458		 *	in-line log in host file system
1459		 */
1460		rc = lmLogShutdown(log);
1461		kfree(log);
1462		goto out;
1463	}
1464
1465	if (!log->no_integrity)
1466		lmLogFileSystem(log, sbi, 0);
1467
1468	if (!list_empty(&log->sb_list))
1469		goto out;
1470
1471	/*
1472	 * TODO: ensure that the dummy_log is in a state to allow
1473	 * lbmLogShutdown to deallocate all the buffers and call
1474	 * kfree against dummy_log.  For now, leave dummy_log & its
1475	 * buffers in memory, and resuse if another no-integrity mount
1476	 * is requested.
1477	 */
1478	if (log->no_integrity)
1479		goto out;
1480
1481	/*
1482	 *	external log as separate logical volume
1483	 */
1484	list_del(&log->journal_list);
1485	bdev_file = log->bdev_file;
1486	rc = lmLogShutdown(log);
1487
1488	bdev_fput(bdev_file);
1489
1490	kfree(log);
1491
1492      out:
1493	mutex_unlock(&jfs_log_mutex);
1494	jfs_info("lmLogClose: exit(%d)", rc);
1495	return rc;
1496}
1497
1498
1499/*
1500 * NAME:	jfs_flush_journal()
1501 *
1502 * FUNCTION:	initiate write of any outstanding transactions to the journal
1503 *		and optionally wait until they are all written to disk
1504 *
1505 *		wait == 0  flush until latest txn is committed, don't wait
1506 *		wait == 1  flush until latest txn is committed, wait
1507 *		wait > 1   flush until all txn's are complete, wait
1508 */
1509void jfs_flush_journal(struct jfs_log *log, int wait)
1510{
1511	int i;
1512	struct tblock *target = NULL;
1513
1514	/* jfs_write_inode may call us during read-only mount */
1515	if (!log)
1516		return;
1517
1518	jfs_info("jfs_flush_journal: log:0x%p wait=%d", log, wait);
1519
1520	LOGGC_LOCK(log);
1521
1522	if (!list_empty(&log->cqueue)) {
1523		/*
1524		 * This ensures that we will keep writing to the journal as long
1525		 * as there are unwritten commit records
1526		 */
1527		target = list_entry(log->cqueue.prev, struct tblock, cqueue);
1528
1529		if (test_bit(log_FLUSH, &log->flag)) {
1530			/*
1531			 * We're already flushing.
1532			 * if flush_tblk is NULL, we are flushing everything,
1533			 * so leave it that way.  Otherwise, update it to the
1534			 * latest transaction
1535			 */
1536			if (log->flush_tblk)
1537				log->flush_tblk = target;
1538		} else {
1539			/* Only flush until latest transaction is committed */
1540			log->flush_tblk = target;
1541			set_bit(log_FLUSH, &log->flag);
1542
1543			/*
1544			 * Initiate I/O on outstanding transactions
1545			 */
1546			if (!(log->cflag & logGC_PAGEOUT)) {
1547				log->cflag |= logGC_PAGEOUT;
1548				lmGCwrite(log, 0);
1549			}
1550		}
1551	}
1552	if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) {
1553		/* Flush until all activity complete */
1554		set_bit(log_FLUSH, &log->flag);
1555		log->flush_tblk = NULL;
1556	}
1557
1558	if (wait && target && !(target->flag & tblkGC_COMMITTED)) {
1559		DECLARE_WAITQUEUE(__wait, current);
1560
1561		add_wait_queue(&target->gcwait, &__wait);
1562		set_current_state(TASK_UNINTERRUPTIBLE);
1563		LOGGC_UNLOCK(log);
1564		schedule();
1565		LOGGC_LOCK(log);
1566		remove_wait_queue(&target->gcwait, &__wait);
1567	}
1568	LOGGC_UNLOCK(log);
1569
1570	if (wait < 2)
1571		return;
1572
1573	write_special_inodes(log, filemap_fdatawrite);
1574
1575	/*
1576	 * If there was recent activity, we may need to wait
1577	 * for the lazycommit thread to catch up
1578	 */
1579	if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) {
1580		for (i = 0; i < 200; i++) {	/* Too much? */
1581			msleep(250);
1582			write_special_inodes(log, filemap_fdatawrite);
1583			if (list_empty(&log->cqueue) &&
1584			    list_empty(&log->synclist))
1585				break;
1586		}
1587	}
1588	assert(list_empty(&log->cqueue));
1589
1590#ifdef CONFIG_JFS_DEBUG
1591	if (!list_empty(&log->synclist)) {
1592		struct logsyncblk *lp;
1593
1594		printk(KERN_ERR "jfs_flush_journal: synclist not empty\n");
1595		list_for_each_entry(lp, &log->synclist, synclist) {
1596			if (lp->xflag & COMMIT_PAGE) {
1597				struct metapage *mp = (struct metapage *)lp;
1598				print_hex_dump(KERN_ERR, "metapage: ",
1599					       DUMP_PREFIX_ADDRESS, 16, 4,
1600					       mp, sizeof(struct metapage), 0);
1601				print_hex_dump(KERN_ERR, "page: ",
1602					       DUMP_PREFIX_ADDRESS, 16,
1603					       sizeof(long), mp->page,
1604					       sizeof(struct page), 0);
1605			} else
1606				print_hex_dump(KERN_ERR, "tblock:",
1607					       DUMP_PREFIX_ADDRESS, 16, 4,
1608					       lp, sizeof(struct tblock), 0);
1609		}
1610	}
1611#else
1612	WARN_ON(!list_empty(&log->synclist));
1613#endif
1614	clear_bit(log_FLUSH, &log->flag);
1615}
1616
1617/*
1618 * NAME:	lmLogShutdown()
1619 *
1620 * FUNCTION:	log shutdown at last LogClose().
1621 *
1622 *		write log syncpt record.
1623 *		update super block to set redone flag to 0.
1624 *
1625 * PARAMETER:	log	- log inode
1626 *
1627 * RETURN:	0	- success
1628 *
1629 * serialization: single last close thread
1630 */
1631int lmLogShutdown(struct jfs_log * log)
1632{
1633	int rc;
1634	struct lrd lrd;
1635	int lsn;
1636	struct logsuper *logsuper;
1637	struct lbuf *bpsuper;
1638	struct lbuf *bp;
1639	struct logpage *lp;
1640
1641	jfs_info("lmLogShutdown: log:0x%p", log);
1642
1643	jfs_flush_journal(log, 2);
1644
1645	/*
1646	 * write the last SYNCPT record with syncpoint = 0
1647	 * (i.e., log redo up to HERE !)
1648	 */
1649	lrd.logtid = 0;
1650	lrd.backchain = 0;
1651	lrd.type = cpu_to_le16(LOG_SYNCPT);
1652	lrd.length = 0;
1653	lrd.log.syncpt.sync = 0;
1654
1655	lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1656	bp = log->bp;
1657	lp = (struct logpage *) bp->l_ldata;
1658	lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1659	lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0);
1660	lbmIOWait(log->bp, lbmFREE);
1661	log->bp = NULL;
1662
1663	/*
1664	 * synchronous update log superblock
1665	 * mark log state as shutdown cleanly
1666	 * (i.e., Log does not need to be replayed).
1667	 */
1668	if ((rc = lbmRead(log, 1, &bpsuper)))
1669		goto out;
1670
1671	logsuper = (struct logsuper *) bpsuper->l_ldata;
1672	logsuper->state = cpu_to_le32(LOGREDONE);
1673	logsuper->end = cpu_to_le32(lsn);
1674	lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1675	rc = lbmIOWait(bpsuper, lbmFREE);
1676
1677	jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d",
1678		 lsn, log->page, log->eor);
1679
1680      out:
1681	/*
1682	 * shutdown per log i/o
1683	 */
1684	lbmLogShutdown(log);
1685
1686	if (rc) {
1687		jfs_warn("lmLogShutdown: exit(%d)", rc);
1688	}
1689	return rc;
1690}
1691
1692
1693/*
1694 * NAME:	lmLogFileSystem()
1695 *
1696 * FUNCTION:	insert (<activate> = true)/remove (<activate> = false)
1697 *	file system into/from log active file system list.
1698 *
1699 * PARAMETE:	log	- pointer to logs inode.
1700 *		fsdev	- kdev_t of filesystem.
1701 *		serial	- pointer to returned log serial number
1702 *		activate - insert/remove device from active list.
1703 *
1704 * RETURN:	0	- success
1705 *		errors returned by vms_iowait().
1706 */
1707static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
1708			   int activate)
1709{
1710	int rc = 0;
1711	int i;
1712	struct logsuper *logsuper;
1713	struct lbuf *bpsuper;
1714	uuid_t *uuid = &sbi->uuid;
1715
1716	/*
1717	 * insert/remove file system device to log active file system list.
1718	 */
1719	if ((rc = lbmRead(log, 1, &bpsuper)))
1720		return rc;
1721
1722	logsuper = (struct logsuper *) bpsuper->l_ldata;
1723	if (activate) {
1724		for (i = 0; i < MAX_ACTIVE; i++)
1725			if (uuid_is_null(&logsuper->active[i].uuid)) {
1726				uuid_copy(&logsuper->active[i].uuid, uuid);
1727				sbi->aggregate = i;
1728				break;
1729			}
1730		if (i == MAX_ACTIVE) {
1731			jfs_warn("Too many file systems sharing journal!");
1732			lbmFree(bpsuper);
1733			return -EMFILE;	/* Is there a better rc? */
1734		}
1735	} else {
1736		for (i = 0; i < MAX_ACTIVE; i++)
1737			if (uuid_equal(&logsuper->active[i].uuid, uuid)) {
1738				uuid_copy(&logsuper->active[i].uuid,
1739					  &uuid_null);
1740				break;
1741			}
1742		if (i == MAX_ACTIVE) {
1743			jfs_warn("Somebody stomped on the journal!");
1744			lbmFree(bpsuper);
1745			return -EIO;
1746		}
1747
1748	}
1749
1750	/*
1751	 * synchronous write log superblock:
1752	 *
1753	 * write sidestream bypassing write queue:
1754	 * at file system mount, log super block is updated for
1755	 * activation of the file system before any log record
1756	 * (MOUNT record) of the file system, and at file system
1757	 * unmount, all meta data for the file system has been
1758	 * flushed before log super block is updated for deactivation
1759	 * of the file system.
1760	 */
1761	lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1762	rc = lbmIOWait(bpsuper, lbmFREE);
1763
1764	return rc;
1765}
1766
1767/*
1768 *		log buffer manager (lbm)
1769 *		------------------------
1770 *
1771 * special purpose buffer manager supporting log i/o requirements.
1772 *
1773 * per log write queue:
1774 * log pageout occurs in serial order by fifo write queue and
1775 * restricting to a single i/o in pregress at any one time.
1776 * a circular singly-linked list
1777 * (log->wrqueue points to the tail, and buffers are linked via
1778 * bp->wrqueue field), and
1779 * maintains log page in pageout ot waiting for pageout in serial pageout.
1780 */
1781
1782/*
1783 *	lbmLogInit()
1784 *
1785 * initialize per log I/O setup at lmLogInit()
1786 */
1787static int lbmLogInit(struct jfs_log * log)
1788{				/* log inode */
1789	int i;
1790	struct lbuf *lbuf;
1791
1792	jfs_info("lbmLogInit: log:0x%p", log);
1793
1794	/* initialize current buffer cursor */
1795	log->bp = NULL;
1796
1797	/* initialize log device write queue */
1798	log->wqueue = NULL;
1799
1800	/*
1801	 * Each log has its own buffer pages allocated to it.  These are
1802	 * not managed by the page cache.  This ensures that a transaction
1803	 * writing to the log does not block trying to allocate a page from
1804	 * the page cache (for the log).  This would be bad, since page
1805	 * allocation waits on the kswapd thread that may be committing inodes
1806	 * which would cause log activity.  Was that clear?  I'm trying to
1807	 * avoid deadlock here.
1808	 */
1809	init_waitqueue_head(&log->free_wait);
1810
1811	log->lbuf_free = NULL;
1812
1813	for (i = 0; i < LOGPAGES;) {
1814		char *buffer;
1815		uint offset;
1816		struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
1817
1818		if (!page)
1819			goto error;
1820		buffer = page_address(page);
1821		for (offset = 0; offset < PAGE_SIZE; offset += LOGPSIZE) {
1822			lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL);
1823			if (lbuf == NULL) {
1824				if (offset == 0)
1825					__free_page(page);
1826				goto error;
1827			}
1828			if (offset) /* we already have one reference */
1829				get_page(page);
1830			lbuf->l_offset = offset;
1831			lbuf->l_ldata = buffer + offset;
1832			lbuf->l_page = page;
1833			lbuf->l_log = log;
1834			init_waitqueue_head(&lbuf->l_ioevent);
1835
1836			lbuf->l_freelist = log->lbuf_free;
1837			log->lbuf_free = lbuf;
1838			i++;
1839		}
1840	}
1841
1842	return (0);
1843
1844      error:
1845	lbmLogShutdown(log);
1846	return -ENOMEM;
1847}
1848
1849
1850/*
1851 *	lbmLogShutdown()
1852 *
1853 * finalize per log I/O setup at lmLogShutdown()
1854 */
1855static void lbmLogShutdown(struct jfs_log * log)
1856{
1857	struct lbuf *lbuf;
1858
1859	jfs_info("lbmLogShutdown: log:0x%p", log);
1860
1861	lbuf = log->lbuf_free;
1862	while (lbuf) {
1863		struct lbuf *next = lbuf->l_freelist;
1864		__free_page(lbuf->l_page);
1865		kfree(lbuf);
1866		lbuf = next;
1867	}
1868}
1869
1870
1871/*
1872 *	lbmAllocate()
1873 *
1874 * allocate an empty log buffer
1875 */
1876static struct lbuf *lbmAllocate(struct jfs_log * log, int pn)
1877{
1878	struct lbuf *bp;
1879	unsigned long flags;
1880
1881	/*
1882	 * recycle from log buffer freelist if any
1883	 */
1884	LCACHE_LOCK(flags);
1885	LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags);
1886	log->lbuf_free = bp->l_freelist;
1887	LCACHE_UNLOCK(flags);
1888
1889	bp->l_flag = 0;
1890
1891	bp->l_wqnext = NULL;
1892	bp->l_freelist = NULL;
1893
1894	bp->l_pn = pn;
1895	bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize));
1896	bp->l_ceor = 0;
1897
1898	return bp;
1899}
1900
1901
1902/*
1903 *	lbmFree()
1904 *
1905 * release a log buffer to freelist
1906 */
1907static void lbmFree(struct lbuf * bp)
1908{
1909	unsigned long flags;
1910
1911	LCACHE_LOCK(flags);
1912
1913	lbmfree(bp);
1914
1915	LCACHE_UNLOCK(flags);
1916}
1917
1918static void lbmfree(struct lbuf * bp)
1919{
1920	struct jfs_log *log = bp->l_log;
1921
1922	assert(bp->l_wqnext == NULL);
1923
1924	/*
1925	 * return the buffer to head of freelist
1926	 */
1927	bp->l_freelist = log->lbuf_free;
1928	log->lbuf_free = bp;
1929
1930	wake_up(&log->free_wait);
1931	return;
1932}
1933
1934
1935/*
1936 * NAME:	lbmRedrive
1937 *
1938 * FUNCTION:	add a log buffer to the log redrive list
1939 *
1940 * PARAMETER:
1941 *	bp	- log buffer
1942 *
1943 * NOTES:
1944 *	Takes log_redrive_lock.
1945 */
1946static inline void lbmRedrive(struct lbuf *bp)
1947{
1948	unsigned long flags;
1949
1950	spin_lock_irqsave(&log_redrive_lock, flags);
1951	bp->l_redrive_next = log_redrive_list;
1952	log_redrive_list = bp;
1953	spin_unlock_irqrestore(&log_redrive_lock, flags);
1954
1955	wake_up_process(jfsIOthread);
1956}
1957
1958
1959/*
1960 *	lbmRead()
1961 */
1962static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
1963{
1964	struct bio *bio;
1965	struct lbuf *bp;
1966
1967	/*
1968	 * allocate a log buffer
1969	 */
1970	*bpp = bp = lbmAllocate(log, pn);
1971	jfs_info("lbmRead: bp:0x%p pn:0x%x", bp, pn);
1972
1973	bp->l_flag |= lbmREAD;
1974
1975	bio = bio_alloc(file_bdev(log->bdev_file), 1, REQ_OP_READ, GFP_NOFS);
1976	bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
1977	__bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
1978	BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
1979
1980	bio->bi_end_io = lbmIODone;
1981	bio->bi_private = bp;
1982	/*check if journaling to disk has been disabled*/
1983	if (log->no_integrity) {
1984		bio->bi_iter.bi_size = 0;
1985		lbmIODone(bio);
1986	} else {
1987		submit_bio(bio);
1988	}
1989
1990	wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD));
1991
1992	return 0;
1993}
1994
1995
1996/*
1997 *	lbmWrite()
1998 *
1999 * buffer at head of pageout queue stays after completion of
2000 * partial-page pageout and redriven by explicit initiation of
2001 * pageout by caller until full-page pageout is completed and
2002 * released.
2003 *
2004 * device driver i/o done redrives pageout of new buffer at
2005 * head of pageout queue when current buffer at head of pageout
2006 * queue is released at the completion of its full-page pageout.
2007 *
2008 * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit().
2009 * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone()
2010 */
2011static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
2012		     int cant_block)
2013{
2014	struct lbuf *tail;
2015	unsigned long flags;
2016
2017	jfs_info("lbmWrite: bp:0x%p flag:0x%x pn:0x%x", bp, flag, bp->l_pn);
2018
2019	/* map the logical block address to physical block address */
2020	bp->l_blkno =
2021	    log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2022
2023	LCACHE_LOCK(flags);		/* disable+lock */
2024
2025	/*
2026	 * initialize buffer for device driver
2027	 */
2028	bp->l_flag = flag;
2029
2030	/*
2031	 *	insert bp at tail of write queue associated with log
2032	 *
2033	 * (request is either for bp already/currently at head of queue
2034	 * or new bp to be inserted at tail)
2035	 */
2036	tail = log->wqueue;
2037
2038	/* is buffer not already on write queue ? */
2039	if (bp->l_wqnext == NULL) {
2040		/* insert at tail of wqueue */
2041		if (tail == NULL) {
2042			log->wqueue = bp;
2043			bp->l_wqnext = bp;
2044		} else {
2045			log->wqueue = bp;
2046			bp->l_wqnext = tail->l_wqnext;
2047			tail->l_wqnext = bp;
2048		}
2049
2050		tail = bp;
2051	}
2052
2053	/* is buffer at head of wqueue and for write ? */
2054	if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) {
2055		LCACHE_UNLOCK(flags);	/* unlock+enable */
2056		return;
2057	}
2058
2059	LCACHE_UNLOCK(flags);	/* unlock+enable */
2060
2061	if (cant_block)
2062		lbmRedrive(bp);
2063	else if (flag & lbmSYNC)
2064		lbmStartIO(bp);
2065	else {
2066		LOGGC_UNLOCK(log);
2067		lbmStartIO(bp);
2068		LOGGC_LOCK(log);
2069	}
2070}
2071
2072
2073/*
2074 *	lbmDirectWrite()
2075 *
2076 * initiate pageout bypassing write queue for sidestream
2077 * (e.g., log superblock) write;
2078 */
2079static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
2080{
2081	jfs_info("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x",
2082		 bp, flag, bp->l_pn);
2083
2084	/*
2085	 * initialize buffer for device driver
2086	 */
2087	bp->l_flag = flag | lbmDIRECT;
2088
2089	/* map the logical block address to physical block address */
2090	bp->l_blkno =
2091	    log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2092
2093	/*
2094	 *	initiate pageout of the page
2095	 */
2096	lbmStartIO(bp);
2097}
2098
2099
2100/*
2101 * NAME:	lbmStartIO()
2102 *
2103 * FUNCTION:	Interface to DD strategy routine
2104 *
2105 * RETURN:	none
2106 *
2107 * serialization: LCACHE_LOCK() is NOT held during log i/o;
2108 */
2109static void lbmStartIO(struct lbuf * bp)
2110{
2111	struct bio *bio;
2112	struct jfs_log *log = bp->l_log;
2113	struct block_device *bdev = NULL;
2114
2115	jfs_info("lbmStartIO");
2116
2117	if (!log->no_integrity)
2118		bdev = file_bdev(log->bdev_file);
2119
2120	bio = bio_alloc(bdev, 1, REQ_OP_WRITE | REQ_SYNC,
2121			GFP_NOFS);
2122	bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
2123	__bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
2124	BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
2125
2126	bio->bi_end_io = lbmIODone;
2127	bio->bi_private = bp;
2128
2129	/* check if journaling to disk has been disabled */
2130	if (log->no_integrity) {
2131		bio->bi_iter.bi_size = 0;
2132		lbmIODone(bio);
2133	} else {
2134		submit_bio(bio);
2135		INCREMENT(lmStat.submitted);
2136	}
2137}
2138
2139
2140/*
2141 *	lbmIOWait()
2142 */
2143static int lbmIOWait(struct lbuf * bp, int flag)
2144{
2145	unsigned long flags;
2146	int rc = 0;
2147
2148	jfs_info("lbmIOWait1: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2149
2150	LCACHE_LOCK(flags);		/* disable+lock */
2151
2152	LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags);
2153
2154	rc = (bp->l_flag & lbmERROR) ? -EIO : 0;
2155
2156	if (flag & lbmFREE)
2157		lbmfree(bp);
2158
2159	LCACHE_UNLOCK(flags);	/* unlock+enable */
2160
2161	jfs_info("lbmIOWait2: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2162	return rc;
2163}
2164
2165/*
2166 *	lbmIODone()
2167 *
2168 * executed at INTIODONE level
2169 */
2170static void lbmIODone(struct bio *bio)
2171{
2172	struct lbuf *bp = bio->bi_private;
2173	struct lbuf *nextbp, *tail;
2174	struct jfs_log *log;
2175	unsigned long flags;
2176
2177	/*
2178	 * get back jfs buffer bound to the i/o buffer
2179	 */
2180	jfs_info("lbmIODone: bp:0x%p flag:0x%x", bp, bp->l_flag);
2181
2182	LCACHE_LOCK(flags);		/* disable+lock */
2183
2184	bp->l_flag |= lbmDONE;
2185
2186	if (bio->bi_status) {
2187		bp->l_flag |= lbmERROR;
2188
2189		jfs_err("lbmIODone: I/O error in JFS log");
2190	}
2191
2192	bio_put(bio);
2193
2194	/*
2195	 *	pagein completion
2196	 */
2197	if (bp->l_flag & lbmREAD) {
2198		bp->l_flag &= ~lbmREAD;
2199
2200		LCACHE_UNLOCK(flags);	/* unlock+enable */
2201
2202		/* wakeup I/O initiator */
2203		LCACHE_WAKEUP(&bp->l_ioevent);
2204
2205		return;
2206	}
2207
2208	/*
2209	 *	pageout completion
2210	 *
2211	 * the bp at the head of write queue has completed pageout.
2212	 *
2213	 * if single-commit/full-page pageout, remove the current buffer
2214	 * from head of pageout queue, and redrive pageout with
2215	 * the new buffer at head of pageout queue;
2216	 * otherwise, the partial-page pageout buffer stays at
2217	 * the head of pageout queue to be redriven for pageout
2218	 * by lmGroupCommit() until full-page pageout is completed.
2219	 */
2220	bp->l_flag &= ~lbmWRITE;
2221	INCREMENT(lmStat.pagedone);
2222
2223	/* update committed lsn */
2224	log = bp->l_log;
2225	log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor;
2226
2227	if (bp->l_flag & lbmDIRECT) {
2228		LCACHE_WAKEUP(&bp->l_ioevent);
2229		LCACHE_UNLOCK(flags);
2230		return;
2231	}
2232
2233	tail = log->wqueue;
2234
2235	/* single element queue */
2236	if (bp == tail) {
2237		/* remove head buffer of full-page pageout
2238		 * from log device write queue
2239		 */
2240		if (bp->l_flag & lbmRELEASE) {
2241			log->wqueue = NULL;
2242			bp->l_wqnext = NULL;
2243		}
2244	}
2245	/* multi element queue */
2246	else {
2247		/* remove head buffer of full-page pageout
2248		 * from log device write queue
2249		 */
2250		if (bp->l_flag & lbmRELEASE) {
2251			nextbp = tail->l_wqnext = bp->l_wqnext;
2252			bp->l_wqnext = NULL;
2253
2254			/*
2255			 * redrive pageout of next page at head of write queue:
2256			 * redrive next page without any bound tblk
2257			 * (i.e., page w/o any COMMIT records), or
2258			 * first page of new group commit which has been
2259			 * queued after current page (subsequent pageout
2260			 * is performed synchronously, except page without
2261			 * any COMMITs) by lmGroupCommit() as indicated
2262			 * by lbmWRITE flag;
2263			 */
2264			if (nextbp->l_flag & lbmWRITE) {
2265				/*
2266				 * We can't do the I/O at interrupt time.
2267				 * The jfsIO thread can do it
2268				 */
2269				lbmRedrive(nextbp);
2270			}
2271		}
2272	}
2273
2274	/*
2275	 *	synchronous pageout:
2276	 *
2277	 * buffer has not necessarily been removed from write queue
2278	 * (e.g., synchronous write of partial-page with COMMIT):
2279	 * leave buffer for i/o initiator to dispose
2280	 */
2281	if (bp->l_flag & lbmSYNC) {
2282		LCACHE_UNLOCK(flags);	/* unlock+enable */
2283
2284		/* wakeup I/O initiator */
2285		LCACHE_WAKEUP(&bp->l_ioevent);
2286	}
2287
2288	/*
2289	 *	Group Commit pageout:
2290	 */
2291	else if (bp->l_flag & lbmGC) {
2292		LCACHE_UNLOCK(flags);
2293		lmPostGC(bp);
2294	}
2295
2296	/*
2297	 *	asynchronous pageout:
2298	 *
2299	 * buffer must have been removed from write queue:
2300	 * insert buffer at head of freelist where it can be recycled
2301	 */
2302	else {
2303		assert(bp->l_flag & lbmRELEASE);
2304		assert(bp->l_flag & lbmFREE);
2305		lbmfree(bp);
2306
2307		LCACHE_UNLOCK(flags);	/* unlock+enable */
2308	}
2309}
2310
2311int jfsIOWait(void *arg)
2312{
2313	struct lbuf *bp;
2314
2315	do {
2316		spin_lock_irq(&log_redrive_lock);
2317		while ((bp = log_redrive_list)) {
2318			log_redrive_list = bp->l_redrive_next;
2319			bp->l_redrive_next = NULL;
2320			spin_unlock_irq(&log_redrive_lock);
2321			lbmStartIO(bp);
2322			spin_lock_irq(&log_redrive_lock);
2323		}
2324
2325		if (freezing(current)) {
2326			spin_unlock_irq(&log_redrive_lock);
2327			try_to_freeze();
2328		} else {
2329			set_current_state(TASK_INTERRUPTIBLE);
2330			spin_unlock_irq(&log_redrive_lock);
2331			schedule();
2332		}
2333	} while (!kthread_should_stop());
2334
2335	jfs_info("jfsIOWait being killed!");
2336	return 0;
2337}
2338
2339/*
2340 * NAME:	lmLogFormat()/jfs_logform()
2341 *
2342 * FUNCTION:	format file system log
2343 *
2344 * PARAMETERS:
2345 *	log	- volume log
2346 *	logAddress - start address of log space in FS block
2347 *	logSize	- length of log space in FS block;
2348 *
2349 * RETURN:	0	- success
2350 *		-EIO	- i/o error
2351 *
2352 * XXX: We're synchronously writing one page at a time.  This needs to
2353 *	be improved by writing multiple pages at once.
2354 */
2355int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
2356{
2357	int rc = -EIO;
2358	struct jfs_sb_info *sbi;
2359	struct logsuper *logsuper;
2360	struct logpage *lp;
2361	int lspn;		/* log sequence page number */
2362	struct lrd *lrd_ptr;
2363	int npages = 0;
2364	struct lbuf *bp;
2365
2366	jfs_info("lmLogFormat: logAddress:%Ld logSize:%d",
2367		 (long long)logAddress, logSize);
2368
2369	sbi = list_entry(log->sb_list.next, struct jfs_sb_info, log_list);
2370
2371	/* allocate a log buffer */
2372	bp = lbmAllocate(log, 1);
2373
2374	npages = logSize >> sbi->l2nbperpage;
2375
2376	/*
2377	 *	log space:
2378	 *
2379	 * page 0 - reserved;
2380	 * page 1 - log superblock;
2381	 * page 2 - log data page: A SYNC log record is written
2382	 *	    into this page at logform time;
2383	 * pages 3-N - log data page: set to empty log data pages;
2384	 */
2385	/*
2386	 *	init log superblock: log page 1
2387	 */
2388	logsuper = (struct logsuper *) bp->l_ldata;
2389
2390	logsuper->magic = cpu_to_le32(LOGMAGIC);
2391	logsuper->version = cpu_to_le32(LOGVERSION);
2392	logsuper->state = cpu_to_le32(LOGREDONE);
2393	logsuper->flag = cpu_to_le32(sbi->mntflag);	/* ? */
2394	logsuper->size = cpu_to_le32(npages);
2395	logsuper->bsize = cpu_to_le32(sbi->bsize);
2396	logsuper->l2bsize = cpu_to_le32(sbi->l2bsize);
2397	logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE);
2398
2399	bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2400	bp->l_blkno = logAddress + sbi->nbperpage;
2401	lbmStartIO(bp);
2402	if ((rc = lbmIOWait(bp, 0)))
2403		goto exit;
2404
2405	/*
2406	 *	init pages 2 to npages-1 as log data pages:
2407	 *
2408	 * log page sequence number (lpsn) initialization:
2409	 *
2410	 * pn:   0     1     2     3                 n-1
2411	 *       +-----+-----+=====+=====+===.....===+=====+
2412	 * lspn:             N-1   0     1           N-2
2413	 *                   <--- N page circular file ---->
2414	 *
2415	 * the N (= npages-2) data pages of the log is maintained as
2416	 * a circular file for the log records;
2417	 * lpsn grows by 1 monotonically as each log page is written
2418	 * to the circular file of the log;
2419	 * and setLogpage() will not reset the page number even if
2420	 * the eor is equal to LOGPHDRSIZE. In order for binary search
2421	 * still work in find log end process, we have to simulate the
2422	 * log wrap situation at the log format time.
2423	 * The 1st log page written will have the highest lpsn. Then
2424	 * the succeeding log pages will have ascending order of
2425	 * the lspn starting from 0, ... (N-2)
2426	 */
2427	lp = (struct logpage *) bp->l_ldata;
2428	/*
2429	 * initialize 1st log page to be written: lpsn = N - 1,
2430	 * write a SYNCPT log record is written to this page
2431	 */
2432	lp->h.page = lp->t.page = cpu_to_le32(npages - 3);
2433	lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE);
2434
2435	lrd_ptr = (struct lrd *) &lp->data;
2436	lrd_ptr->logtid = 0;
2437	lrd_ptr->backchain = 0;
2438	lrd_ptr->type = cpu_to_le16(LOG_SYNCPT);
2439	lrd_ptr->length = 0;
2440	lrd_ptr->log.syncpt.sync = 0;
2441
2442	bp->l_blkno += sbi->nbperpage;
2443	bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2444	lbmStartIO(bp);
2445	if ((rc = lbmIOWait(bp, 0)))
2446		goto exit;
2447
2448	/*
2449	 *	initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
2450	 */
2451	for (lspn = 0; lspn < npages - 3; lspn++) {
2452		lp->h.page = lp->t.page = cpu_to_le32(lspn);
2453		lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
2454
2455		bp->l_blkno += sbi->nbperpage;
2456		bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2457		lbmStartIO(bp);
2458		if ((rc = lbmIOWait(bp, 0)))
2459			goto exit;
2460	}
2461
2462	rc = 0;
2463exit:
2464	/*
2465	 *	finalize log
2466	 */
2467	/* release the buffer */
2468	lbmFree(bp);
2469
2470	return rc;
2471}
2472
2473#ifdef CONFIG_JFS_STATISTICS
2474int jfs_lmstats_proc_show(struct seq_file *m, void *v)
2475{
2476	seq_printf(m,
2477		       "JFS Logmgr stats\n"
2478		       "================\n"
2479		       "commits = %d\n"
2480		       "writes submitted = %d\n"
2481		       "writes completed = %d\n"
2482		       "full pages submitted = %d\n"
2483		       "partial pages submitted = %d\n",
2484		       lmStat.commit,
2485		       lmStat.submitted,
2486		       lmStat.pagedone,
2487		       lmStat.full_page,
2488		       lmStat.partial_page);
2489	return 0;
2490}
2491#endif /* CONFIG_JFS_STATISTICS */
2492