• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /asuswrt-rt-n18u-9.0.0.4.380.2695/release/src-rt-6.x.4708/linux/linux-2.6.36/fs/logfs/
1/*
2 * fs/logfs/logfs_abi.h
3 *
4 * As should be obvious for Linux kernel code, license is GPLv2
5 *
6 * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
7 *
8 * Public header for logfs.
9 */
10#ifndef FS_LOGFS_LOGFS_ABI_H
11#define FS_LOGFS_LOGFS_ABI_H
12
13/* For out-of-kernel compiles */
14#ifndef BUILD_BUG_ON
15#define BUILD_BUG_ON(condition) /**/
16#endif
17
18#define SIZE_CHECK(type, size)					\
19static inline void check_##type(void)				\
20{								\
21	BUILD_BUG_ON(sizeof(struct type) != (size));		\
22}
23
24/*
25 * Throughout the logfs code, we're constantly dealing with blocks at
26 * various positions or offsets.  To remove confusion, we stricly
27 * distinguish between a "position" - the logical position within a
28 * file and an "offset" - the physical location within the device.
29 *
30 * Any usage of the term offset for a logical location or position for
31 * a physical one is a bug and should get fixed.
32 */
33
34/*
35 * Block are allocated in one of several segments depending on their
36 * level.  The following levels are used:
37 *  0	- regular data block
38 *  1	- i1 indirect blocks
39 *  2	- i2 indirect blocks
40 *  3	- i3 indirect blocks
41 *  4	- i4 indirect blocks
42 *  5	- i5 indirect blocks
43 *  6	- ifile data blocks
44 *  7	- ifile i1 indirect blocks
45 *  8	- ifile i2 indirect blocks
46 *  9	- ifile i3 indirect blocks
47 * 10	- ifile i4 indirect blocks
48 * 11	- ifile i5 indirect blocks
49 * Potential levels to be used in the future:
50 * 12	- gc recycled blocks, long-lived data
51 * 13	- replacement blocks, short-lived data
52 *
53 * Levels 1-11 are necessary for robust gc operations and help separate
54 * short-lived metadata from longer-lived file data.  In the future,
55 * file data should get separated into several segments based on simple
56 * heuristics.  Old data recycled during gc operation is expected to be
57 * long-lived.  New data is of uncertain life expectancy.  New data
58 * used to replace older blocks in existing files is expected to be
59 * short-lived.
60 */
61
62
63/* Magic numbers.  64bit for superblock, 32bit for statfs f_type */
64#define LOGFS_MAGIC		0x7a3a8e5cb9d5bf67ull
65#define LOGFS_MAGIC_U32		0xc97e8168u
66
67/*
68 * Various blocksize related macros.  Blocksize is currently fixed at 4KiB.
69 * Sooner or later that should become configurable and the macros replaced
70 * by something superblock-dependent.  Pointers in indirect blocks are and
71 * will remain 64bit.
72 *
73 * LOGFS_BLOCKSIZE	- self-explaining
74 * LOGFS_BLOCK_FACTOR	- number of pointers per indirect block
75 * LOGFS_BLOCK_BITS	- log2 of LOGFS_BLOCK_FACTOR, used for shifts
76 */
77#define LOGFS_BLOCKSIZE		(4096ull)
78#define LOGFS_BLOCK_FACTOR	(LOGFS_BLOCKSIZE / sizeof(u64))
79#define LOGFS_BLOCK_BITS	(9)
80
81/*
82 * Number of blocks at various levels of indirection.  There are 16 direct
83 * block pointers plus a single indirect pointer.
84 */
85#define I0_BLOCKS		(16)
86#define I1_BLOCKS		LOGFS_BLOCK_FACTOR
87#define I2_BLOCKS		(LOGFS_BLOCK_FACTOR * I1_BLOCKS)
88#define I3_BLOCKS		(LOGFS_BLOCK_FACTOR * I2_BLOCKS)
89#define I4_BLOCKS		(LOGFS_BLOCK_FACTOR * I3_BLOCKS)
90#define I5_BLOCKS		(LOGFS_BLOCK_FACTOR * I4_BLOCKS)
91
92#define INDIRECT_INDEX		I0_BLOCKS
93#define LOGFS_EMBEDDED_FIELDS	(I0_BLOCKS + 1)
94
95/*
96 * Sizes at which files require another level of indirection.  Files smaller
97 * than LOGFS_EMBEDDED_SIZE can be completely stored in the inode itself,
98 * similar like ext2 fast symlinks.
99 *
100 * Data at a position smaller than LOGFS_I0_SIZE is accessed through the
101 * direct pointers, else through the 1x indirect pointer and so forth.
102 */
103#define LOGFS_EMBEDDED_SIZE	(LOGFS_EMBEDDED_FIELDS * sizeof(u64))
104#define LOGFS_I0_SIZE		(I0_BLOCKS * LOGFS_BLOCKSIZE)
105#define LOGFS_I1_SIZE		(I1_BLOCKS * LOGFS_BLOCKSIZE)
106#define LOGFS_I2_SIZE		(I2_BLOCKS * LOGFS_BLOCKSIZE)
107#define LOGFS_I3_SIZE		(I3_BLOCKS * LOGFS_BLOCKSIZE)
108#define LOGFS_I4_SIZE		(I4_BLOCKS * LOGFS_BLOCKSIZE)
109#define LOGFS_I5_SIZE		(I5_BLOCKS * LOGFS_BLOCKSIZE)
110
111/*
112 * Each indirect block pointer must have this flag set, if all block pointers
113 * behind it are set, i.e. there is no hole hidden in the shadow of this
114 * indirect block pointer.
115 */
116#define LOGFS_FULLY_POPULATED (1ULL << 63)
117#define pure_ofs(ofs) (ofs & ~LOGFS_FULLY_POPULATED)
118
119/*
120 * LogFS needs to separate data into levels.  Each level is defined as the
121 * maximal possible distance from the master inode (inode of the inode file).
122 * Data blocks reside on level 0, 1x indirect block on level 1, etc.
123 * Inodes reside on level 6, indirect blocks for the inode file on levels 7-11.
124 * This effort is necessary to guarantee garbage collection to always make
125 * progress.
126 *
127 * LOGFS_MAX_INDIRECT is the maximal indirection through indirect blocks,
128 * LOGFS_MAX_LEVELS is one more for the actual data level of a file.  It is
129 * the maximal number of levels for one file.
130 * LOGFS_NO_AREAS is twice that, as the inode file and regular files are
131 * effectively stacked on top of each other.
132 */
133#define LOGFS_MAX_INDIRECT	(5)
134#define LOGFS_MAX_LEVELS	(LOGFS_MAX_INDIRECT + 1)
135#define LOGFS_NO_AREAS		(2 * LOGFS_MAX_LEVELS)
136
137/* Maximum size of filenames */
138#define LOGFS_MAX_NAMELEN	(255)
139
140/* Number of segments in the primary journal. */
141#define LOGFS_JOURNAL_SEGS	(16)
142
143/* Maximum number of free/erased/etc. segments in journal entries */
144#define MAX_CACHED_SEGS		(64)
145
146
147/*
148 * LOGFS_OBJECT_HEADERSIZE is the size of a single header in the object store,
149 * LOGFS_MAX_OBJECTSIZE the size of the largest possible object, including
150 * its header,
151 * LOGFS_SEGMENT_RESERVE is the amount of space reserved for each segment for
152 * its segment header and the padded space at the end when no further objects
153 * fit.
154 */
155#define LOGFS_OBJECT_HEADERSIZE	(0x1c)
156#define LOGFS_SEGMENT_HEADERSIZE (0x18)
157#define LOGFS_MAX_OBJECTSIZE	(LOGFS_OBJECT_HEADERSIZE + LOGFS_BLOCKSIZE)
158#define LOGFS_SEGMENT_RESERVE	\
159	(LOGFS_SEGMENT_HEADERSIZE + LOGFS_MAX_OBJECTSIZE - 1)
160
161/*
162 * Segment types:
163 * SEG_SUPER	- Data or indirect block
164 * SEG_JOURNAL	- Inode
165 * SEG_OSTORE	- Dentry
166 */
167enum {
168	SEG_SUPER	= 0x01,
169	SEG_JOURNAL	= 0x02,
170	SEG_OSTORE	= 0x03,
171};
172
173/**
174 * struct logfs_segment_header - per-segment header in the ostore
175 *
176 * @crc:			crc32 of header (there is no data)
177 * @pad:			unused, must be 0
178 * @type:			segment type, see above
179 * @level:			GC level for all objects in this segment
180 * @segno:			segment number
181 * @ec:				erase count for this segment
182 * @gec:			global erase count at time of writing
183 */
184struct logfs_segment_header {
185	__be32	crc;
186	__be16	pad;
187	__u8	type;
188	__u8	level;
189	__be32	segno;
190	__be32	ec;
191	__be64	gec;
192};
193
194SIZE_CHECK(logfs_segment_header, LOGFS_SEGMENT_HEADERSIZE);
195
196#define LOGFS_FEATURES_INCOMPAT		(0ull)
197#define LOGFS_FEATURES_RO_COMPAT	(0ull)
198#define LOGFS_FEATURES_COMPAT		(0ull)
199
200/**
201 * struct logfs_disk_super - on-medium superblock
202 *
203 * @ds_magic:			magic number, must equal LOGFS_MAGIC
204 * @ds_crc:			crc32 of structure starting with the next field
205 * @ds_ifile_levels:		maximum number of levels for ifile
206 * @ds_iblock_levels:		maximum number of levels for regular files
207 * @ds_data_levels:		number of separate levels for data
208 * @pad0:			reserved, must be 0
209 * @ds_feature_incompat:	incompatible filesystem features
210 * @ds_feature_ro_compat:	read-only compatible filesystem features
211 * @ds_feature_compat:		compatible filesystem features
212 * @ds_flags:			flags
213 * @ds_segment_shift:		log2 of segment size
214 * @ds_block_shift:		log2 of block size
215 * @ds_write_shift:		log2 of write size
216 * @pad1:			reserved, must be 0
217 * @ds_journal_seg:		segments used by primary journal
218 * @ds_root_reserve:		bytes reserved for the superuser
219 * @ds_speed_reserve:		bytes reserved to speed up GC
220 * @ds_bad_seg_reserve:		number of segments reserved to handle bad blocks
221 * @pad2:			reserved, must be 0
222 * @pad3:			reserved, must be 0
223 *
224 * Contains only read-only fields.  Read-write fields like the amount of used
225 * space is tracked in the dynamic superblock, which is stored in the journal.
226 */
227struct logfs_disk_super {
228	struct logfs_segment_header ds_sh;
229	__be64	ds_magic;
230
231	__be32	ds_crc;
232	__u8	ds_ifile_levels;
233	__u8	ds_iblock_levels;
234	__u8	ds_data_levels;
235	__u8	ds_segment_shift;
236	__u8	ds_block_shift;
237	__u8	ds_write_shift;
238	__u8	pad0[6];
239
240	__be64	ds_filesystem_size;
241	__be32	ds_segment_size;
242	__be32  ds_bad_seg_reserve;
243
244	__be64	ds_feature_incompat;
245	__be64	ds_feature_ro_compat;
246
247	__be64	ds_feature_compat;
248	__be64	ds_feature_flags;
249
250	__be64	ds_root_reserve;
251	__be64  ds_speed_reserve;
252
253	__be32	ds_journal_seg[LOGFS_JOURNAL_SEGS];
254
255	__be64	ds_super_ofs[2];
256	__be64	pad3[8];
257};
258
259SIZE_CHECK(logfs_disk_super, 256);
260
261/*
262 * Object types:
263 * OBJ_BLOCK	- Data or indirect block
264 * OBJ_INODE	- Inode
265 * OBJ_DENTRY	- Dentry
266 */
267enum {
268	OBJ_BLOCK	= 0x04,
269	OBJ_INODE	= 0x05,
270	OBJ_DENTRY	= 0x06,
271};
272
273/**
274 * struct logfs_object_header - per-object header in the ostore
275 *
276 * @crc:			crc32 of header, excluding data_crc
277 * @len:			length of data
278 * @type:			object type, see above
279 * @compr:			compression type
280 * @ino:			inode number
281 * @bix:			block index
282 * @data_crc:			crc32 of payload
283 */
284struct logfs_object_header {
285	__be32	crc;
286	__be16	len;
287	__u8	type;
288	__u8	compr;
289	__be64	ino;
290	__be64	bix;
291	__be32	data_crc;
292} __attribute__((packed));
293
294SIZE_CHECK(logfs_object_header, LOGFS_OBJECT_HEADERSIZE);
295
296/*
297 * Reserved inode numbers:
298 * LOGFS_INO_MASTER	- master inode (for inode file)
299 * LOGFS_INO_ROOT	- root directory
300 * LOGFS_INO_SEGFILE	- per-segment used bytes and erase count
301 */
302enum {
303	LOGFS_INO_MAPPING	= 0x00,
304	LOGFS_INO_MASTER	= 0x01,
305	LOGFS_INO_ROOT		= 0x02,
306	LOGFS_INO_SEGFILE	= 0x03,
307	LOGFS_RESERVED_INOS	= 0x10,
308};
309
310/*
311 * Inode flags.  High bits should never be written to the medium.  They are
312 * reserved for in-memory usage.
313 * Low bits should either remain in sync with the corresponding FS_*_FL or
314 * reuse slots that obviously don't make sense for logfs.
315 *
316 * LOGFS_IF_DIRTY	Inode must be written back
317 * LOGFS_IF_ZOMBIE	Inode has been deleted
318 * LOGFS_IF_STILLBORN	-ENOSPC happened when creating inode
319 */
320#define LOGFS_IF_COMPRESSED	0x00000004 /* == FS_COMPR_FL */
321#define LOGFS_IF_DIRTY		0x20000000
322#define LOGFS_IF_ZOMBIE		0x40000000
323#define LOGFS_IF_STILLBORN	0x80000000
324
325/* Flags available to chattr */
326#define LOGFS_FL_USER_VISIBLE	(LOGFS_IF_COMPRESSED)
327#define LOGFS_FL_USER_MODIFIABLE (LOGFS_IF_COMPRESSED)
328/* Flags inherited from parent directory on file/directory creation */
329#define LOGFS_FL_INHERITED	(LOGFS_IF_COMPRESSED)
330
331/**
332 * struct logfs_disk_inode - on-medium inode
333 *
334 * @di_mode:			file mode
335 * @di_pad:			reserved, must be 0
336 * @di_flags:			inode flags, see above
337 * @di_uid:			user id
338 * @di_gid:			group id
339 * @di_ctime:			change time
340 * @di_mtime:			modify time
341 * @di_refcount:		reference count (aka nlink or link count)
342 * @di_generation:		inode generation, for nfs
343 * @di_used_bytes:		number of bytes used
344 * @di_size:			file size
345 * @di_data:			data pointers
346 */
347struct logfs_disk_inode {
348	__be16	di_mode;
349	__u8	di_height;
350	__u8	di_pad;
351	__be32	di_flags;
352	__be32	di_uid;
353	__be32	di_gid;
354
355	__be64	di_ctime;
356	__be64	di_mtime;
357
358	__be64	di_atime;
359	__be32	di_refcount;
360	__be32	di_generation;
361
362	__be64	di_used_bytes;
363	__be64	di_size;
364
365	__be64	di_data[LOGFS_EMBEDDED_FIELDS];
366};
367
368SIZE_CHECK(logfs_disk_inode, 200);
369
370#define INODE_POINTER_OFS \
371	(offsetof(struct logfs_disk_inode, di_data) / sizeof(__be64))
372#define INODE_USED_OFS \
373	(offsetof(struct logfs_disk_inode, di_used_bytes) / sizeof(__be64))
374#define INODE_SIZE_OFS \
375	(offsetof(struct logfs_disk_inode, di_size) / sizeof(__be64))
376#define INODE_HEIGHT_OFS	(0)
377
378/**
379 * struct logfs_disk_dentry - on-medium dentry structure
380 *
381 * @ino:			inode number
382 * @namelen:			length of file name
383 * @type:			file type, identical to bits 12..15 of mode
384 * @name:			file name
385 */
386struct logfs_disk_dentry {
387	__be64	ino;
388	__be16	namelen;
389	__u8	type;
390	__u8	name[LOGFS_MAX_NAMELEN];
391} __attribute__((packed));
392
393SIZE_CHECK(logfs_disk_dentry, 266);
394
395#define RESERVED		0xffffffff
396#define BADSEG			0xffffffff
397/**
398 * struct logfs_segment_entry - segment file entry
399 *
400 * @ec_level:			erase count and level
401 * @valid:			number of valid bytes
402 *
403 * Segment file contains one entry for every segment.  ec_level contains the
404 * erasecount in the upper 28 bits and the level in the lower 4 bits.  An
405 * ec_level of BADSEG (-1) identifies bad segments.  valid contains the number
406 * of valid bytes or RESERVED (-1 again) if the segment is used for either the
407 * superblock or the journal, or when the segment is bad.
408 */
409struct logfs_segment_entry {
410	__be32	ec_level;
411	__be32	valid;
412};
413
414SIZE_CHECK(logfs_segment_entry, 8);
415
416/**
417 * struct logfs_journal_header - header for journal entries (JEs)
418 *
419 * @h_crc:			crc32 of journal entry
420 * @h_len:			length of compressed journal entry,
421 *				not including header
422 * @h_datalen:			length of uncompressed data
423 * @h_type:			JE type
424 * @h_compr:			compression type
425 * @h_pad:			reserved
426 */
427struct logfs_journal_header {
428	__be32	h_crc;
429	__be16	h_len;
430	__be16	h_datalen;
431	__be16	h_type;
432	__u8	h_compr;
433	__u8	h_pad[5];
434};
435
436SIZE_CHECK(logfs_journal_header, 16);
437
438/*
439 * Life expectency of data.
440 * VIM_DEFAULT		- default vim
441 * VIM_SEGFILE		- for segment file only - very short-living
442 * VIM_GC		- GC'd data - likely long-living
443 */
444enum logfs_vim {
445	VIM_DEFAULT	= 0,
446	VIM_SEGFILE	= 1,
447};
448
449/**
450 * struct logfs_je_area - wbuf header
451 *
452 * @segno:			segment number of area
453 * @used_bytes:			number of bytes already used
454 * @gc_level:			GC level
455 * @vim:			life expectancy of data
456 *
457 * "Areas" are segments currently being used for writing.  There is at least
458 * one area per GC level.  Several may be used to separate long-living from
459 * short-living data.  If an area with unknown vim is encountered, it can
460 * simply be closed.
461 * The write buffer immediately follow this header.
462 */
463struct logfs_je_area {
464	__be32	segno;
465	__be32	used_bytes;
466	__u8	gc_level;
467	__u8	vim;
468} __attribute__((packed));
469
470SIZE_CHECK(logfs_je_area, 10);
471
472#define MAX_JOURNAL_HEADER \
473	(sizeof(struct logfs_journal_header) + sizeof(struct logfs_je_area))
474
475/**
476 * struct logfs_je_dynsb - dynamic superblock
477 *
478 * @ds_gec:			global erase count
479 * @ds_sweeper:			current position of GC "sweeper"
480 * @ds_rename_dir:		source directory ino (see dir.c documentation)
481 * @ds_rename_pos:		position of source dd (see dir.c documentation)
482 * @ds_victim_ino:		victims of incomplete dir operation (see dir.c)
483 * @ds_victim_ino:		parent inode of victim (see dir.c)
484 * @ds_used_bytes:		number of used bytes
485 */
486struct logfs_je_dynsb {
487	__be64	ds_gec;
488	__be64	ds_sweeper;
489
490	__be64	ds_rename_dir;
491	__be64	ds_rename_pos;
492
493	__be64	ds_victim_ino;
494	__be64	ds_victim_parent;
495
496	__be64	ds_used_bytes;
497	__be32	ds_generation;
498	__be32	pad;
499};
500
501SIZE_CHECK(logfs_je_dynsb, 64);
502
503/**
504 * struct logfs_je_anchor - anchor of filesystem tree, aka master inode
505 *
506 * @da_size:			size of inode file
507 * @da_last_ino:		last created inode
508 * @da_used_bytes:		number of bytes used
509 * @da_data:			data pointers
510 */
511struct logfs_je_anchor {
512	__be64	da_size;
513	__be64	da_last_ino;
514
515	__be64	da_used_bytes;
516	u8	da_height;
517	u8	pad[7];
518
519	__be64	da_data[LOGFS_EMBEDDED_FIELDS];
520};
521
522SIZE_CHECK(logfs_je_anchor, 168);
523
524/**
525 * struct logfs_je_spillout - spillout entry (from 1st to 2nd journal)
526 *
527 * @so_segment:			segments used for 2nd journal
528 *
529 * Length of the array is given by h_len field in the header.
530 */
531struct logfs_je_spillout {
532	__be64	so_segment[0];
533};
534
535SIZE_CHECK(logfs_je_spillout, 0);
536
537/**
538 * struct logfs_je_journal_ec - erase counts for all journal segments
539 *
540 * @ec:				erase count
541 *
542 * Length of the array is given by h_len field in the header.
543 */
544struct logfs_je_journal_ec {
545	__be32	ec[0];
546};
547
548SIZE_CHECK(logfs_je_journal_ec, 0);
549
550/**
551 * struct logfs_je_free_segments - list of free segmetns with erase count
552 */
553struct logfs_je_free_segments {
554	__be32	segno;
555	__be32	ec;
556};
557
558SIZE_CHECK(logfs_je_free_segments, 8);
559
560/**
561 * struct logfs_seg_alias - list of segment aliases
562 */
563struct logfs_seg_alias {
564	__be32	old_segno;
565	__be32	new_segno;
566};
567
568SIZE_CHECK(logfs_seg_alias, 8);
569
570/**
571 * struct logfs_obj_alias - list of object aliases
572 */
573struct logfs_obj_alias {
574	__be64	ino;
575	__be64	bix;
576	__be64	val;
577	u8	level;
578	u8	pad[5];
579	__be16	child_no;
580};
581
582SIZE_CHECK(logfs_obj_alias, 32);
583
584/**
585 * Compression types.
586 *
587 * COMPR_NONE	- uncompressed
588 * COMPR_ZLIB	- compressed with zlib
589 */
590enum {
591	COMPR_NONE	= 0,
592	COMPR_ZLIB	= 1,
593};
594
595/*
596 * Journal entries come in groups of 16.  First group contains unique
597 * entries, next groups contain one entry per level
598 *
599 * JE_FIRST	- smallest possible journal entry number
600 *
601 * JEG_BASE	- base group, containing unique entries
602 * JE_COMMIT	- commit entry, validates all previous entries
603 * JE_DYNSB	- dynamic superblock, anything that ought to be in the
604 *		  superblock but cannot because it is read-write data
605 * JE_ANCHOR	- anchor aka master inode aka inode file's inode
606 * JE_ERASECOUNT  erasecounts for all journal segments
607 * JE_SPILLOUT	- unused
608 * JE_SEG_ALIAS	- aliases segments
609 * JE_AREA	- area description
610 *
611 * JE_LAST	- largest possible journal entry number
612 */
613enum {
614	JE_FIRST	= 0x01,
615
616	JEG_BASE	= 0x00,
617	JE_COMMIT	= 0x02,
618	JE_DYNSB	= 0x03,
619	JE_ANCHOR	= 0x04,
620	JE_ERASECOUNT	= 0x05,
621	JE_SPILLOUT	= 0x06,
622	JE_OBJ_ALIAS	= 0x0d,
623	JE_AREA		= 0x0e,
624
625	JE_LAST		= 0x0e,
626};
627
628#endif
629