1/*
2 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
3 * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __INCORE_DOT_H__
11#define __INCORE_DOT_H__
12
13#include <linux/fs.h>
14#include <linux/workqueue.h>
15#include <linux/dlm.h>
16#include <linux/buffer_head.h>
17
18#define DIO_WAIT	0x00000010
19#define DIO_METADATA	0x00000020
20#define DIO_ALL		0x00000100
21
22struct gfs2_log_operations;
23struct gfs2_log_element;
24struct gfs2_holder;
25struct gfs2_glock;
26struct gfs2_quota_data;
27struct gfs2_trans;
28struct gfs2_ail;
29struct gfs2_jdesc;
30struct gfs2_sbd;
31struct lm_lockops;
32
33typedef void (*gfs2_glop_bh_t) (struct gfs2_glock *gl, unsigned int ret);
34
35struct gfs2_log_header_host {
36	u64 lh_sequence;	/* Sequence number of this transaction */
37	u32 lh_flags;		/* GFS2_LOG_HEAD_... */
38	u32 lh_tail;		/* Block number of log tail */
39	u32 lh_blkno;
40	u32 lh_hash;
41};
42
43/*
44 * Structure of operations that are associated with each
45 * type of element in the log.
46 */
47
48struct gfs2_log_operations {
49	void (*lo_add) (struct gfs2_sbd *sdp, struct gfs2_log_element *le);
50	void (*lo_before_commit) (struct gfs2_sbd *sdp);
51	void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai);
52	void (*lo_before_scan) (struct gfs2_jdesc *jd,
53				struct gfs2_log_header_host *head, int pass);
54	int (*lo_scan_elements) (struct gfs2_jdesc *jd, unsigned int start,
55				 struct gfs2_log_descriptor *ld, __be64 *ptr,
56				 int pass);
57	void (*lo_after_scan) (struct gfs2_jdesc *jd, int error, int pass);
58	const char *lo_name;
59};
60
61struct gfs2_log_element {
62	struct list_head le_list;
63	const struct gfs2_log_operations *le_ops;
64};
65
66#define GBF_FULL 1
67
68struct gfs2_bitmap {
69	struct buffer_head *bi_bh;
70	char *bi_clone;
71	unsigned long bi_flags;
72	u32 bi_offset;
73	u32 bi_start;
74	u32 bi_len;
75};
76
77struct gfs2_rgrpd {
78	struct list_head rd_list;	/* Link with superblock */
79	struct list_head rd_list_mru;
80	struct gfs2_glock *rd_gl;	/* Glock for this rgrp */
81	u64 rd_addr;			/* grp block disk address */
82	u64 rd_data0;			/* first data location */
83	u32 rd_length;			/* length of rgrp header in fs blocks */
84	u32 rd_data;			/* num of data blocks in rgrp */
85	u32 rd_bitbytes;		/* number of bytes in data bitmaps */
86	u32 rd_free;
87	u32 rd_free_clone;
88	u32 rd_dinodes;
89	u64 rd_igeneration;
90	struct gfs2_bitmap *rd_bits;
91	struct mutex rd_mutex;
92	struct gfs2_log_element rd_le;
93	struct gfs2_sbd *rd_sbd;
94	unsigned int rd_bh_count;
95	u32 rd_last_alloc;
96	u32 rd_flags;
97#define GFS2_RDF_CHECK		0x10000000 /* check for unlinked inodes */
98#define GFS2_RDF_UPTODATE	0x20000000 /* rg is up to date */
99#define GFS2_RDF_ERROR		0x40000000 /* error in rg */
100#define GFS2_RDF_MASK		0xf0000000 /* mask for internal flags */
101};
102
103enum gfs2_state_bits {
104	BH_Pinned = BH_PrivateStart,
105	BH_Escaped = BH_PrivateStart + 1,
106};
107
108BUFFER_FNS(Pinned, pinned)
109TAS_BUFFER_FNS(Pinned, pinned)
110BUFFER_FNS(Escaped, escaped)
111TAS_BUFFER_FNS(Escaped, escaped)
112
113struct gfs2_bufdata {
114	struct buffer_head *bd_bh;
115	struct gfs2_glock *bd_gl;
116
117	union {
118		struct list_head list_tr;
119		u64 blkno;
120	} u;
121#define bd_list_tr u.list_tr
122#define bd_blkno u.blkno
123
124	struct gfs2_log_element bd_le;
125
126	struct gfs2_ail *bd_ail;
127	struct list_head bd_ail_st_list;
128	struct list_head bd_ail_gl_list;
129};
130
131/*
132 * Internally, we prefix things with gdlm_ and GDLM_ (for gfs-dlm) since a
133 * prefix of lock_dlm_ gets awkward.
134 */
135
136#define GDLM_STRNAME_BYTES	25
137#define GDLM_LVB_SIZE		32
138
139enum {
140	DFL_BLOCK_LOCKS		= 0,
141};
142
143struct lm_lockname {
144	u64 ln_number;
145	unsigned int ln_type;
146};
147
148#define lm_name_equal(name1, name2) \
149        (((name1)->ln_number == (name2)->ln_number) && \
150         ((name1)->ln_type == (name2)->ln_type))
151
152
153struct gfs2_glock_operations {
154	void (*go_xmote_th) (struct gfs2_glock *gl);
155	int (*go_xmote_bh) (struct gfs2_glock *gl, struct gfs2_holder *gh);
156	void (*go_inval) (struct gfs2_glock *gl, int flags);
157	int (*go_demote_ok) (const struct gfs2_glock *gl);
158	int (*go_lock) (struct gfs2_holder *gh);
159	void (*go_unlock) (struct gfs2_holder *gh);
160	int (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl);
161	void (*go_callback) (struct gfs2_glock *gl);
162	const int go_type;
163	const unsigned long go_min_hold_time;
164	const unsigned long go_flags;
165#define GLOF_ASPACE 1
166};
167
168enum {
169	/* States */
170	HIF_HOLDER		= 6,  /* Set for gh that "holds" the glock */
171	HIF_FIRST		= 7,
172	HIF_WAIT		= 10,
173};
174
175struct gfs2_holder {
176	struct list_head gh_list;
177
178	struct gfs2_glock *gh_gl;
179	struct pid *gh_owner_pid;
180	unsigned int gh_state;
181	unsigned gh_flags;
182
183	int gh_error;
184	unsigned long gh_iflags; /* HIF_... */
185	unsigned long gh_ip;
186};
187
188enum {
189	GLF_LOCK			= 1,
190	GLF_DEMOTE			= 3,
191	GLF_PENDING_DEMOTE		= 4,
192	GLF_DEMOTE_IN_PROGRESS		= 5,
193	GLF_DIRTY			= 6,
194	GLF_LFLUSH			= 7,
195	GLF_INVALIDATE_IN_PROGRESS	= 8,
196	GLF_REPLY_PENDING		= 9,
197	GLF_INITIAL			= 10,
198	GLF_FROZEN			= 11,
199};
200
201struct gfs2_glock {
202	struct hlist_node gl_list;
203	unsigned long gl_flags;		/* GLF_... */
204	struct lm_lockname gl_name;
205	atomic_t gl_ref;
206
207	spinlock_t gl_spin;
208
209	unsigned int gl_state;
210	unsigned int gl_target;
211	unsigned int gl_reply;
212	unsigned int gl_hash;
213	unsigned int gl_req;
214	unsigned int gl_demote_state; /* state requested by remote node */
215	unsigned long gl_demote_time; /* time of first demote request */
216	struct list_head gl_holders;
217
218	const struct gfs2_glock_operations *gl_ops;
219	char gl_strname[GDLM_STRNAME_BYTES];
220	struct dlm_lksb gl_lksb;
221	char gl_lvb[32];
222	unsigned long gl_tchange;
223	void *gl_object;
224
225	struct list_head gl_lru;
226
227	struct gfs2_sbd *gl_sbd;
228
229	struct list_head gl_ail_list;
230	atomic_t gl_ail_count;
231	struct delayed_work gl_work;
232	struct work_struct gl_delete;
233};
234
235#define GFS2_MIN_LVB_SIZE 32	/* Min size of LVB that gfs2 supports */
236
237struct gfs2_alloc {
238	/* Quota stuff */
239
240	struct gfs2_quota_data *al_qd[2*MAXQUOTAS];
241	struct gfs2_holder al_qd_ghs[2*MAXQUOTAS];
242	unsigned int al_qd_num;
243
244	u32 al_requested; /* Filled in by caller of gfs2_inplace_reserve() */
245	u32 al_alloced; /* Filled in by gfs2_alloc_*() */
246
247	/* Filled in by gfs2_inplace_reserve() */
248
249	unsigned int al_line;
250	char *al_file;
251	struct gfs2_holder al_ri_gh;
252	struct gfs2_holder al_rgd_gh;
253	struct gfs2_rgrpd *al_rgd;
254
255};
256
257enum {
258	GIF_INVALID		= 0,
259	GIF_QD_LOCKED		= 1,
260	GIF_SW_PAGED		= 3,
261};
262
263
264struct gfs2_inode {
265	struct inode i_inode;
266	u64 i_no_addr;
267	u64 i_no_formal_ino;
268	u64 i_generation;
269	u64 i_eattr;
270	loff_t i_disksize;
271	unsigned long i_flags;		/* GIF_... */
272	struct gfs2_glock *i_gl; /* Move into i_gh? */
273	struct gfs2_holder i_iopen_gh;
274	struct gfs2_holder i_gh; /* for prepare/commit_write only */
275	struct gfs2_alloc *i_alloc;
276	u64 i_goal;	/* goal block for allocations */
277	struct rw_semaphore i_rw_mutex;
278	struct list_head i_trunc_list;
279	u32 i_entries;
280	u32 i_diskflags;
281	u8 i_height;
282	u8 i_depth;
283};
284
285/*
286 * Since i_inode is the first element of struct gfs2_inode,
287 * this is effectively a cast.
288 */
289static inline struct gfs2_inode *GFS2_I(struct inode *inode)
290{
291	return container_of(inode, struct gfs2_inode, i_inode);
292}
293
294static inline struct gfs2_sbd *GFS2_SB(const struct inode *inode)
295{
296	return inode->i_sb->s_fs_info;
297}
298
299struct gfs2_file {
300	struct mutex f_fl_mutex;
301	struct gfs2_holder f_fl_gh;
302};
303
304struct gfs2_revoke_replay {
305	struct list_head rr_list;
306	u64 rr_blkno;
307	unsigned int rr_where;
308};
309
310enum {
311	QDF_USER		= 0,
312	QDF_CHANGE		= 1,
313	QDF_LOCKED		= 2,
314};
315
316struct gfs2_quota_data {
317	struct list_head qd_list;
318	struct list_head qd_reclaim;
319
320	atomic_t qd_count;
321
322	u32 qd_id;
323	unsigned long qd_flags;		/* QDF_... */
324
325	s64 qd_change;
326	s64 qd_change_sync;
327
328	unsigned int qd_slot;
329	unsigned int qd_slot_count;
330
331	struct buffer_head *qd_bh;
332	struct gfs2_quota_change *qd_bh_qc;
333	unsigned int qd_bh_count;
334
335	struct gfs2_glock *qd_gl;
336	struct gfs2_quota_lvb qd_qb;
337
338	u64 qd_sync_gen;
339	unsigned long qd_last_warn;
340};
341
342struct gfs2_trans {
343	unsigned long tr_ip;
344
345	unsigned int tr_blocks;
346	unsigned int tr_revokes;
347	unsigned int tr_reserved;
348
349	struct gfs2_holder tr_t_gh;
350
351	int tr_touched;
352
353	unsigned int tr_num_buf;
354	unsigned int tr_num_buf_new;
355	unsigned int tr_num_databuf_new;
356	unsigned int tr_num_buf_rm;
357	unsigned int tr_num_databuf_rm;
358	struct list_head tr_list_buf;
359
360	unsigned int tr_num_revoke;
361	unsigned int tr_num_revoke_rm;
362};
363
364struct gfs2_ail {
365	struct list_head ai_list;
366
367	unsigned int ai_first;
368	struct list_head ai_ail1_list;
369	struct list_head ai_ail2_list;
370
371	u64 ai_sync_gen;
372};
373
374struct gfs2_journal_extent {
375	struct list_head extent_list;
376
377	unsigned int lblock; /* First logical block */
378	u64 dblock; /* First disk block */
379	u64 blocks;
380};
381
382struct gfs2_jdesc {
383	struct list_head jd_list;
384	struct list_head extent_list;
385	struct work_struct jd_work;
386	struct inode *jd_inode;
387	unsigned long jd_flags;
388#define JDF_RECOVERY 1
389	unsigned int jd_jid;
390	unsigned int jd_blocks;
391};
392
393struct gfs2_statfs_change_host {
394	s64 sc_total;
395	s64 sc_free;
396	s64 sc_dinodes;
397};
398
399#define GFS2_QUOTA_DEFAULT	GFS2_QUOTA_OFF
400#define GFS2_QUOTA_OFF		0
401#define GFS2_QUOTA_ACCOUNT	1
402#define GFS2_QUOTA_ON		2
403
404#define GFS2_DATA_DEFAULT	GFS2_DATA_ORDERED
405#define GFS2_DATA_WRITEBACK	1
406#define GFS2_DATA_ORDERED	2
407
408#define GFS2_ERRORS_DEFAULT     GFS2_ERRORS_WITHDRAW
409#define GFS2_ERRORS_WITHDRAW    0
410#define GFS2_ERRORS_CONTINUE    1 /* place holder for future feature */
411#define GFS2_ERRORS_RO          2 /* place holder for future feature */
412#define GFS2_ERRORS_PANIC       3
413
414struct gfs2_args {
415	char ar_lockproto[GFS2_LOCKNAME_LEN];	/* Name of the Lock Protocol */
416	char ar_locktable[GFS2_LOCKNAME_LEN];	/* Name of the Lock Table */
417	char ar_hostdata[GFS2_LOCKNAME_LEN];	/* Host specific data */
418	unsigned int ar_spectator:1;		/* Don't get a journal */
419	unsigned int ar_ignore_local_fs:1;	/* Ignore optimisations */
420	unsigned int ar_localflocks:1;		/* Let the VFS do flock|fcntl */
421	unsigned int ar_localcaching:1;		/* Local caching */
422	unsigned int ar_debug:1;		/* Oops on errors */
423	unsigned int ar_upgrade:1;		/* Upgrade ondisk format */
424	unsigned int ar_posix_acl:1;		/* Enable posix acls */
425	unsigned int ar_quota:2;		/* off/account/on */
426	unsigned int ar_suiddir:1;		/* suiddir support */
427	unsigned int ar_data:2;			/* ordered/writeback */
428	unsigned int ar_meta:1;			/* mount metafs */
429	unsigned int ar_discard:1;		/* discard requests */
430	unsigned int ar_errors:2;               /* errors=withdraw | panic */
431	unsigned int ar_nobarrier:1;            /* do not send barriers */
432	int ar_commit;				/* Commit interval */
433	int ar_statfs_quantum;			/* The fast statfs interval */
434	int ar_quota_quantum;			/* The quota interval */
435	int ar_statfs_percent;			/* The % change to force sync */
436};
437
438struct gfs2_tune {
439	spinlock_t gt_spin;
440
441	unsigned int gt_logd_secs;
442
443	unsigned int gt_quota_simul_sync; /* Max quotavals to sync at once */
444	unsigned int gt_quota_warn_period; /* Secs between quota warn msgs */
445	unsigned int gt_quota_scale_num; /* Numerator */
446	unsigned int gt_quota_scale_den; /* Denominator */
447	unsigned int gt_quota_quantum; /* Secs between syncs to quota file */
448	unsigned int gt_new_files_jdata;
449	unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */
450	unsigned int gt_complain_secs;
451	unsigned int gt_statfs_quantum;
452	unsigned int gt_statfs_slow;
453};
454
455enum {
456	SDF_JOURNAL_CHECKED	= 0,
457	SDF_JOURNAL_LIVE	= 1,
458	SDF_SHUTDOWN		= 2,
459	SDF_NOBARRIERS		= 3,
460	SDF_NORECOVERY		= 4,
461	SDF_DEMOTE		= 5,
462	SDF_NOJOURNALID		= 6,
463};
464
465#define GFS2_FSNAME_LEN		256
466
467struct gfs2_inum_host {
468	u64 no_formal_ino;
469	u64 no_addr;
470};
471
472struct gfs2_sb_host {
473	u32 sb_magic;
474	u32 sb_type;
475	u32 sb_format;
476
477	u32 sb_fs_format;
478	u32 sb_multihost_format;
479	u32 sb_bsize;
480	u32 sb_bsize_shift;
481
482	struct gfs2_inum_host sb_master_dir;
483	struct gfs2_inum_host sb_root_dir;
484
485	char sb_lockproto[GFS2_LOCKNAME_LEN];
486	char sb_locktable[GFS2_LOCKNAME_LEN];
487	u8 sb_uuid[16];
488};
489
490/*
491 * lm_mount() return values
492 *
493 * ls_jid - the journal ID this node should use
494 * ls_first - this node is the first to mount the file system
495 * ls_lockspace - lock module's context for this file system
496 * ls_ops - lock module's functions
497 */
498
499struct lm_lockstruct {
500	unsigned int ls_jid;
501	unsigned int ls_first;
502	unsigned int ls_first_done;
503	unsigned int ls_nodir;
504	const struct lm_lockops *ls_ops;
505	unsigned long ls_flags;
506	dlm_lockspace_t *ls_dlm;
507
508	int ls_recover_jid_done;
509	int ls_recover_jid_status;
510};
511
512struct gfs2_sbd {
513	struct super_block *sd_vfs;
514	struct kobject sd_kobj;
515	unsigned long sd_flags;	/* SDF_... */
516	struct gfs2_sb_host sd_sb;
517
518	/* Constants computed on mount */
519
520	u32 sd_fsb2bb;
521	u32 sd_fsb2bb_shift;
522	u32 sd_diptrs;	/* Number of pointers in a dinode */
523	u32 sd_inptrs;	/* Number of pointers in a indirect block */
524	u32 sd_jbsize;	/* Size of a journaled data block */
525	u32 sd_hash_bsize;	/* sizeof(exhash block) */
526	u32 sd_hash_bsize_shift;
527	u32 sd_hash_ptrs;	/* Number of pointers in a hash block */
528	u32 sd_qc_per_block;
529	u32 sd_max_dirres;	/* Max blocks needed to add a directory entry */
530	u32 sd_max_height;	/* Max height of a file's metadata tree */
531	u64 sd_heightsize[GFS2_MAX_META_HEIGHT + 1];
532	u32 sd_max_jheight; /* Max height of journaled file's meta tree */
533	u64 sd_jheightsize[GFS2_MAX_META_HEIGHT + 1];
534
535	struct gfs2_args sd_args;	/* Mount arguments */
536	struct gfs2_tune sd_tune;	/* Filesystem tuning structure */
537
538	/* Lock Stuff */
539
540	struct lm_lockstruct sd_lockstruct;
541	struct gfs2_holder sd_live_gh;
542	struct gfs2_glock *sd_rename_gl;
543	struct gfs2_glock *sd_trans_gl;
544	wait_queue_head_t sd_glock_wait;
545	atomic_t sd_glock_disposal;
546
547	/* Inode Stuff */
548
549	struct dentry *sd_master_dir;
550	struct dentry *sd_root_dir;
551
552	struct inode *sd_jindex;
553	struct inode *sd_statfs_inode;
554	struct inode *sd_sc_inode;
555	struct inode *sd_qc_inode;
556	struct inode *sd_rindex;
557	struct inode *sd_quota_inode;
558
559	/* StatFS stuff */
560
561	spinlock_t sd_statfs_spin;
562	struct gfs2_statfs_change_host sd_statfs_master;
563	struct gfs2_statfs_change_host sd_statfs_local;
564	int sd_statfs_force_sync;
565
566	/* Resource group stuff */
567
568	int sd_rindex_uptodate;
569	spinlock_t sd_rindex_spin;
570	struct mutex sd_rindex_mutex;
571	struct list_head sd_rindex_list;
572	struct list_head sd_rindex_mru_list;
573	struct gfs2_rgrpd *sd_rindex_forward;
574	unsigned int sd_rgrps;
575
576	/* Journal index stuff */
577
578	struct list_head sd_jindex_list;
579	spinlock_t sd_jindex_spin;
580	struct mutex sd_jindex_mutex;
581	unsigned int sd_journals;
582
583	struct gfs2_jdesc *sd_jdesc;
584	struct gfs2_holder sd_journal_gh;
585	struct gfs2_holder sd_jinode_gh;
586
587	struct gfs2_holder sd_sc_gh;
588	struct gfs2_holder sd_qc_gh;
589
590	/* Daemon stuff */
591
592	struct task_struct *sd_logd_process;
593	struct task_struct *sd_quotad_process;
594
595	/* Quota stuff */
596
597	struct list_head sd_quota_list;
598	atomic_t sd_quota_count;
599	struct mutex sd_quota_mutex;
600	wait_queue_head_t sd_quota_wait;
601	struct list_head sd_trunc_list;
602	spinlock_t sd_trunc_lock;
603
604	unsigned int sd_quota_slots;
605	unsigned int sd_quota_chunks;
606	unsigned char **sd_quota_bitmap;
607
608	u64 sd_quota_sync_gen;
609
610	/* Log stuff */
611
612	spinlock_t sd_log_lock;
613
614	unsigned int sd_log_blks_reserved;
615	unsigned int sd_log_commited_buf;
616	unsigned int sd_log_commited_databuf;
617	int sd_log_commited_revoke;
618
619	atomic_t sd_log_pinned;
620	unsigned int sd_log_num_buf;
621	unsigned int sd_log_num_revoke;
622	unsigned int sd_log_num_rg;
623	unsigned int sd_log_num_databuf;
624
625	struct list_head sd_log_le_buf;
626	struct list_head sd_log_le_revoke;
627	struct list_head sd_log_le_rg;
628	struct list_head sd_log_le_databuf;
629	struct list_head sd_log_le_ordered;
630
631	atomic_t sd_log_thresh1;
632	atomic_t sd_log_thresh2;
633	atomic_t sd_log_blks_free;
634	wait_queue_head_t sd_log_waitq;
635	wait_queue_head_t sd_logd_waitq;
636
637	u64 sd_log_sequence;
638	unsigned int sd_log_head;
639	unsigned int sd_log_tail;
640	int sd_log_idle;
641
642	struct rw_semaphore sd_log_flush_lock;
643	atomic_t sd_log_in_flight;
644	wait_queue_head_t sd_log_flush_wait;
645
646	unsigned int sd_log_flush_head;
647	u64 sd_log_flush_wrapped;
648
649	struct list_head sd_ail1_list;
650	struct list_head sd_ail2_list;
651	u64 sd_ail_sync_gen;
652
653	/* Replay stuff */
654
655	struct list_head sd_revoke_list;
656	unsigned int sd_replay_tail;
657
658	unsigned int sd_found_blocks;
659	unsigned int sd_found_revokes;
660	unsigned int sd_replayed_blocks;
661
662	/* For quiescing the filesystem */
663
664	struct gfs2_holder sd_freeze_gh;
665	struct mutex sd_freeze_lock;
666	unsigned int sd_freeze_count;
667
668	char sd_fsname[GFS2_FSNAME_LEN];
669	char sd_table_name[GFS2_FSNAME_LEN];
670	char sd_proto_name[GFS2_FSNAME_LEN];
671
672	/* Debugging crud */
673
674	unsigned long sd_last_warning;
675	struct dentry *debugfs_dir;    /* debugfs directory */
676	struct dentry *debugfs_dentry_glocks; /* for debugfs */
677};
678
679#endif /* __INCORE_DOT_H__ */
680