1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 *   Copyright (C) International Business Machines Corp., 2000-2004
4 *   Portions Copyright (C) Christoph Hellwig, 2001-2002
5 */
6#ifndef	_H_JFS_LOGMGR
7#define _H_JFS_LOGMGR
8
9#include <linux/uuid.h>
10
11#include "jfs_filsys.h"
12#include "jfs_lock.h"
13
14/*
15 *	log manager configuration parameters
16 */
17
18/* log page size */
19#define	LOGPSIZE	4096
20#define	L2LOGPSIZE	12
21
22#define LOGPAGES	16	/* Log pages per mounted file system */
23
24/*
25 *	log logical volume
26 *
27 * a log is used to make the commit operation on journalled
28 * files within the same logical volume group atomic.
29 * a log is implemented with a logical volume.
30 * there is one log per logical volume group.
31 *
32 * block 0 of the log logical volume is not used (ipl etc).
33 * block 1 contains a log "superblock" and is used by logFormat(),
34 * lmLogInit(), lmLogShutdown(), and logRedo() to record status
35 * of the log but is not otherwise used during normal processing.
36 * blocks 2 - (N-1) are used to contain log records.
37 *
38 * when a volume group is varied-on-line, logRedo() must have
39 * been executed before the file systems (logical volumes) in
40 * the volume group can be mounted.
41 */
42/*
43 *	log superblock (block 1 of logical volume)
44 */
45#define	LOGSUPER_B	1
46#define	LOGSTART_B	2
47
48#define	LOGMAGIC	0x87654321
49#define	LOGVERSION	1
50
51#define MAX_ACTIVE	128	/* Max active file systems sharing log */
52
53struct logsuper {
54	__le32 magic;		/* 4: log lv identifier */
55	__le32 version;		/* 4: version number */
56	__le32 serial;		/* 4: log open/mount counter */
57	__le32 size;		/* 4: size in number of LOGPSIZE blocks */
58	__le32 bsize;		/* 4: logical block size in byte */
59	__le32 l2bsize;		/* 4: log2 of bsize */
60
61	__le32 flag;		/* 4: option */
62	__le32 state;		/* 4: state - see below */
63
64	__le32 end;		/* 4: addr of last log record set by logredo */
65	uuid_t uuid;		/* 16: 128-bit journal uuid */
66	char label[16];		/* 16: journal label */
67	struct {
68		uuid_t uuid;
69	} active[MAX_ACTIVE];	/* 2048: active file systems list */
70};
71
72/* log flag: commit option (see jfs_filsys.h) */
73
74/* log state */
75#define	LOGMOUNT	0	/* log mounted by lmLogInit() */
76#define LOGREDONE	1	/* log shutdown by lmLogShutdown().
77				 * log redo completed by logredo().
78				 */
79#define LOGWRAP		2	/* log wrapped */
80#define LOGREADERR	3	/* log read error detected in logredo() */
81
82
83/*
84 *	log logical page
85 *
86 * (this comment should be rewritten !)
87 * the header and trailer structures (h,t) will normally have
88 * the same page and eor value.
89 * An exception to this occurs when a complete page write is not
90 * accomplished on a power failure. Since the hardware may "split write"
91 * sectors in the page, any out of order sequence may occur during powerfail
92 * and needs to be recognized during log replay.  The xor value is
93 * an "exclusive or" of all log words in the page up to eor.  This
94 * 32 bit eor is stored with the top 16 bits in the header and the
95 * bottom 16 bits in the trailer.  logredo can easily recognize pages
96 * that were not completed by reconstructing this eor and checking
97 * the log page.
98 *
99 * Previous versions of the operating system did not allow split
100 * writes and detected partially written records in logredo by
101 * ordering the updates to the header, trailer, and the move of data
102 * into the logdata area.  The order: (1) data is moved (2) header
103 * is updated (3) trailer is updated.  In logredo, when the header
104 * differed from the trailer, the header and trailer were reconciled
105 * as follows: if h.page != t.page they were set to the smaller of
106 * the two and h.eor and t.eor set to 8 (i.e. empty page). if (only)
107 * h.eor != t.eor they were set to the smaller of their two values.
108 */
109struct logpage {
110	struct {		/* header */
111		__le32 page;	/* 4: log sequence page number */
112		__le16 rsrvd;	/* 2: */
113		__le16 eor;	/* 2: end-of-log offset of lasrt record write */
114	} h;
115
116	__le32 data[LOGPSIZE / 4 - 4];	/* log record area */
117
118	struct {		/* trailer */
119		__le32 page;	/* 4: normally the same as h.page */
120		__le16 rsrvd;	/* 2: */
121		__le16 eor;	/* 2: normally the same as h.eor */
122	} t;
123};
124
125#define LOGPHDRSIZE	8	/* log page header size */
126#define LOGPTLRSIZE	8	/* log page trailer size */
127
128
129/*
130 *	log record
131 *
132 * (this comment should be rewritten !)
133 * jfs uses only "after" log records (only a single writer is allowed
134 * in a page, pages are written to temporary paging space if
135 * they must be written to disk before commit, and i/o is
136 * scheduled for modified pages to their home location after
137 * the log records containing the after values and the commit
138 * record is written to the log on disk, undo discards the copy
139 * in main-memory.)
140 *
141 * a log record consists of a data area of variable length followed by
142 * a descriptor of fixed size LOGRDSIZE bytes.
143 * the data area is rounded up to an integral number of 4-bytes and
144 * must be no longer than LOGPSIZE.
145 * the descriptor is of size of multiple of 4-bytes and aligned on a
146 * 4-byte boundary.
147 * records are packed one after the other in the data area of log pages.
148 * (sometimes a DUMMY record is inserted so that at least one record ends
149 * on every page or the longest record is placed on at most two pages).
150 * the field eor in page header/trailer points to the byte following
151 * the last record on a page.
152 */
153
154/* log record types */
155#define LOG_COMMIT		0x8000
156#define LOG_SYNCPT		0x4000
157#define LOG_MOUNT		0x2000
158#define LOG_REDOPAGE		0x0800
159#define LOG_NOREDOPAGE		0x0080
160#define LOG_NOREDOINOEXT	0x0040
161#define LOG_UPDATEMAP		0x0008
162#define LOG_NOREDOFILE		0x0001
163
164/* REDOPAGE/NOREDOPAGE log record data type */
165#define	LOG_INODE		0x0001
166#define	LOG_XTREE		0x0002
167#define	LOG_DTREE		0x0004
168#define	LOG_BTROOT		0x0010
169#define	LOG_EA			0x0020
170#define	LOG_ACL			0x0040
171#define	LOG_DATA		0x0080
172#define	LOG_NEW			0x0100
173#define	LOG_EXTEND		0x0200
174#define LOG_RELOCATE		0x0400
175#define LOG_DIR_XTREE		0x0800	/* Xtree is in directory inode */
176
177/* UPDATEMAP log record descriptor type */
178#define	LOG_ALLOCXADLIST	0x0080
179#define	LOG_ALLOCPXDLIST	0x0040
180#define	LOG_ALLOCXAD		0x0020
181#define	LOG_ALLOCPXD		0x0010
182#define	LOG_FREEXADLIST		0x0008
183#define	LOG_FREEPXDLIST		0x0004
184#define	LOG_FREEXAD		0x0002
185#define	LOG_FREEPXD		0x0001
186
187
188struct lrd {
189	/*
190	 * type independent area
191	 */
192	__le32 logtid;		/* 4: log transaction identifier */
193	__le32 backchain;	/* 4: ptr to prev record of same transaction */
194	__le16 type;		/* 2: record type */
195	__le16 length;		/* 2: length of data in record (in byte) */
196	__le32 aggregate;	/* 4: file system lv/aggregate */
197	/* (16) */
198
199	/*
200	 * type dependent area (20)
201	 */
202	union {
203
204		/*
205		 *	COMMIT: commit
206		 *
207		 * transaction commit: no type-dependent information;
208		 */
209
210		/*
211		 *	REDOPAGE: after-image
212		 *
213		 * apply after-image;
214		 *
215		 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
216		 */
217		struct {
218			__le32 fileset;	/* 4: fileset number */
219			__le32 inode;	/* 4: inode number */
220			__le16 type;	/* 2: REDOPAGE record type */
221			__le16 l2linesize;	/* 2: log2 of line size */
222			pxd_t pxd;	/* 8: on-disk page pxd */
223		} redopage;	/* (20) */
224
225		/*
226		 *	NOREDOPAGE: the page is freed
227		 *
228		 * do not apply after-image records which precede this record
229		 * in the log with the same page block number to this page.
230		 *
231		 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
232		 */
233		struct {
234			__le32 fileset;	/* 4: fileset number */
235			__le32 inode;	/* 4: inode number */
236			__le16 type;	/* 2: NOREDOPAGE record type */
237			__le16 rsrvd;	/* 2: reserved */
238			pxd_t pxd;	/* 8: on-disk page pxd */
239		} noredopage;	/* (20) */
240
241		/*
242		 *	UPDATEMAP: update block allocation map
243		 *
244		 * either in-line PXD,
245		 * or     out-of-line  XADLIST;
246		 *
247		 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
248		 */
249		struct {
250			__le32 fileset;	/* 4: fileset number */
251			__le32 inode;	/* 4: inode number */
252			__le16 type;	/* 2: UPDATEMAP record type */
253			__le16 nxd;	/* 2: number of extents */
254			pxd_t pxd;	/* 8: pxd */
255		} updatemap;	/* (20) */
256
257		/*
258		 *	NOREDOINOEXT: the inode extent is freed
259		 *
260		 * do not apply after-image records which precede this
261		 * record in the log with the any of the 4 page block
262		 * numbers in this inode extent.
263		 *
264		 * NOTE: The fileset and pxd fields MUST remain in
265		 *       the same fields in the REDOPAGE record format.
266		 *
267		 */
268		struct {
269			__le32 fileset;	/* 4: fileset number */
270			__le32 iagnum;	/* 4: IAG number     */
271			__le32 inoext_idx;	/* 4: inode extent index */
272			pxd_t pxd;	/* 8: on-disk page pxd */
273		} noredoinoext;	/* (20) */
274
275		/*
276		 *	SYNCPT: log sync point
277		 *
278		 * replay log up to syncpt address specified;
279		 */
280		struct {
281			__le32 sync;	/* 4: syncpt address (0 = here) */
282		} syncpt;
283
284		/*
285		 *	MOUNT: file system mount
286		 *
287		 * file system mount: no type-dependent information;
288		 */
289
290		/*
291		 *	? FREEXTENT: free specified extent(s)
292		 *
293		 * free specified extent(s) from block allocation map
294		 * N.B.: nextents should be length of data/sizeof(xad_t)
295		 */
296		struct {
297			__le32 type;	/* 4: FREEXTENT record type */
298			__le32 nextent;	/* 4: number of extents */
299
300			/* data: PXD or XAD list */
301		} freextent;
302
303		/*
304		 *	? NOREDOFILE: this file is freed
305		 *
306		 * do not apply records which precede this record in the log
307		 * with the same inode number.
308		 *
309		 * NOREDOFILE must be the first to be written at commit
310		 * (last to be read in logredo()) - it prevents
311		 * replay of preceding updates of all preceding generations
312		 * of the inumber esp. the on-disk inode itself.
313		 */
314		struct {
315			__le32 fileset;	/* 4: fileset number */
316			__le32 inode;	/* 4: inode number */
317		} noredofile;
318
319		/*
320		 *	? NEWPAGE:
321		 *
322		 * metadata type dependent
323		 */
324		struct {
325			__le32 fileset;	/* 4: fileset number */
326			__le32 inode;	/* 4: inode number */
327			__le32 type;	/* 4: NEWPAGE record type */
328			pxd_t pxd;	/* 8: on-disk page pxd */
329		} newpage;
330
331		/*
332		 *	? DUMMY: filler
333		 *
334		 * no type-dependent information
335		 */
336	} log;
337};					/* (36) */
338
339#define	LOGRDSIZE	(sizeof(struct lrd))
340
341/*
342 *	line vector descriptor
343 */
344struct lvd {
345	__le16 offset;
346	__le16 length;
347};
348
349
350/*
351 *	log logical volume
352 */
353struct jfs_log {
354
355	struct list_head sb_list;/*  This is used to sync metadata
356				 *    before writing syncpt.
357				 */
358	struct list_head journal_list; /* Global list */
359	struct file *bdev_file;	/* 4: log lv pointer */
360	int serial;		/* 4: log mount serial number */
361
362	s64 base;		/* @8: log extent address (inline log ) */
363	int size;		/* 4: log size in log page (in page) */
364	int l2bsize;		/* 4: log2 of bsize */
365
366	unsigned long flag;	/* 4: flag */
367
368	struct lbuf *lbuf_free;	/* 4: free lbufs */
369	wait_queue_head_t free_wait;	/* 4: */
370
371	/* log write */
372	int logtid;		/* 4: log tid */
373	int page;		/* 4: page number of eol page */
374	int eor;		/* 4: eor of last record in eol page */
375	struct lbuf *bp;	/* 4: current log page buffer */
376
377	struct mutex loglock;	/* 4: log write serialization lock */
378
379	/* syncpt */
380	int nextsync;		/* 4: bytes to write before next syncpt */
381	int active;		/* 4: */
382	wait_queue_head_t syncwait;	/* 4: */
383
384	/* commit */
385	uint cflag;		/* 4: */
386	struct list_head cqueue; /* FIFO commit queue */
387	struct tblock *flush_tblk; /* tblk we're waiting on for flush */
388	int gcrtc;		/* 4: GC_READY transaction count */
389	struct tblock *gclrt;	/* 4: latest GC_READY transaction */
390	spinlock_t gclock;	/* 4: group commit lock */
391	int logsize;		/* 4: log data area size in byte */
392	int lsn;		/* 4: end-of-log */
393	int clsn;		/* 4: clsn */
394	int syncpt;		/* 4: addr of last syncpt record */
395	int sync;		/* 4: addr from last logsync() */
396	struct list_head synclist;	/* 8: logsynclist anchor */
397	spinlock_t synclock;	/* 4: synclist lock */
398	struct lbuf *wqueue;	/* 4: log pageout queue */
399	int count;		/* 4: count */
400	uuid_t uuid;		/* 16: 128-bit uuid of log device */
401
402	int no_integrity;	/* 3: flag to disable journaling to disk */
403};
404
405/*
406 * Log flag
407 */
408#define log_INLINELOG	1
409#define log_SYNCBARRIER	2
410#define log_QUIESCE	3
411#define log_FLUSH	4
412
413/*
414 * group commit flag
415 */
416/* jfs_log */
417#define logGC_PAGEOUT	0x00000001
418
419/* tblock/lbuf */
420#define tblkGC_QUEUE		0x0001
421#define tblkGC_READY		0x0002
422#define tblkGC_COMMIT		0x0004
423#define tblkGC_COMMITTED	0x0008
424#define tblkGC_EOP		0x0010
425#define tblkGC_FREE		0x0020
426#define tblkGC_LEADER		0x0040
427#define tblkGC_ERROR		0x0080
428#define tblkGC_LAZY		0x0100	// D230860
429#define tblkGC_UNLOCKED		0x0200	// D230860
430
431/*
432 *		log cache buffer header
433 */
434struct lbuf {
435	struct jfs_log *l_log;	/* 4: log associated with buffer */
436
437	/*
438	 * data buffer base area
439	 */
440	uint l_flag;		/* 4: pageout control flags */
441
442	struct lbuf *l_wqnext;	/* 4: write queue link */
443	struct lbuf *l_freelist;	/* 4: freelistlink */
444
445	int l_pn;		/* 4: log page number */
446	int l_eor;		/* 4: log record eor */
447	int l_ceor;		/* 4: committed log record eor */
448
449	s64 l_blkno;		/* 8: log page block number */
450	caddr_t l_ldata;	/* 4: data page */
451	struct page *l_page;	/* The page itself */
452	uint l_offset;		/* Offset of l_ldata within the page */
453
454	wait_queue_head_t l_ioevent;	/* 4: i/o done event */
455};
456
457/* Reuse l_freelist for redrive list */
458#define l_redrive_next l_freelist
459
460/*
461 *	logsynclist block
462 *
463 * common logsyncblk prefix for jbuf_t and tblock
464 */
465struct logsyncblk {
466	u16 xflag;		/* flags */
467	u16 flag;		/* only meaninful in tblock */
468	lid_t lid;		/* lock id */
469	s32 lsn;		/* log sequence number */
470	struct list_head synclist;	/* log sync list link */
471};
472
473/*
474 *	logsynclist serialization (per log)
475 */
476
477#define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock)
478#define LOGSYNC_LOCK(log, flags) spin_lock_irqsave(&(log)->synclock, flags)
479#define LOGSYNC_UNLOCK(log, flags) \
480	spin_unlock_irqrestore(&(log)->synclock, flags)
481
482/* compute the difference in bytes of lsn from sync point */
483#define logdiff(diff, lsn, log)\
484{\
485	diff = (lsn) - (log)->syncpt;\
486	if (diff < 0)\
487		diff += (log)->logsize;\
488}
489
490extern int lmLogOpen(struct super_block *sb);
491extern int lmLogClose(struct super_block *sb);
492extern int lmLogShutdown(struct jfs_log * log);
493extern int lmLogInit(struct jfs_log * log);
494extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize);
495extern int lmGroupCommit(struct jfs_log *, struct tblock *);
496extern int jfsIOWait(void *);
497extern void jfs_flush_journal(struct jfs_log * log, int wait);
498extern void jfs_syncpt(struct jfs_log *log, int hard_sync);
499
500#endif				/* _H_JFS_LOGMGR */
501