155716d26SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */
2b8441ed2STejun Heo/*
3b8441ed2STejun Heo * kernfs.h - pseudo filesystem decoupled from vfs locking
4b8441ed2STejun Heo */
5b8441ed2STejun Heo
6b8441ed2STejun Heo#ifndef __LINUX_KERNFS_H
7b8441ed2STejun Heo#define __LINUX_KERNFS_H
8b8441ed2STejun Heo
95d0e26bbSTejun Heo#include <linux/err.h>
10dd8a5b03STejun Heo#include <linux/list.h>
11dd8a5b03STejun Heo#include <linux/mutex.h>
12bc755553STejun Heo#include <linux/idr.h>
13517e64f5STejun Heo#include <linux/lockdep.h>
14cf9e5a73STejun Heo#include <linux/rbtree.h>
15cf9e5a73STejun Heo#include <linux/atomic.h>
1679f1c730SAndy Shevchenko#include <linux/bug.h>
1779f1c730SAndy Shevchenko#include <linux/types.h>
18488dee96SDmitry Torokhov#include <linux/uidgid.h>
19abd54f02STejun Heo#include <linux/wait.h>
20393c3714SMinchan Kim#include <linux/rwsem.h>
21879f40d1STejun Heo
225d60418eSTejun Heostruct file;
23917f56caSTejun Heostruct dentry;
245d60418eSTejun Heostruct iattr;
25dd8a5b03STejun Heostruct seq_file;
26dd8a5b03STejun Heostruct vm_area_struct;
2779f1c730SAndy Shevchenkostruct vm_operations_struct;
284b93dc9bSTejun Heostruct super_block;
294b93dc9bSTejun Heostruct file_system_type;
30147e1a97SJohannes Weinerstruct poll_table_struct;
3123bf1b6bSDavid Howellsstruct fs_context;
325d60418eSTejun Heo
3323bf1b6bSDavid Howellsstruct kernfs_fs_context;
34c525aaddSTejun Heostruct kernfs_open_node;
35c525aaddSTejun Heostruct kernfs_iattrs;
36cf9e5a73STejun Heo
37cf9e5a73STejun Heoenum kernfs_node_type {
38df23fc39STejun Heo	KERNFS_DIR		= 0x0001,
39df23fc39STejun Heo	KERNFS_FILE		= 0x0002,
40df23fc39STejun Heo	KERNFS_LINK		= 0x0004,
41cf9e5a73STejun Heo};
42cf9e5a73STejun Heo
430c47383bSDaniel Xu#define KERNFS_TYPE_MASK		0x000f
440c47383bSDaniel Xu#define KERNFS_FLAG_MASK		~KERNFS_TYPE_MASK
450c47383bSDaniel Xu#define KERNFS_MAX_USER_XATTRS		128
460c47383bSDaniel Xu#define KERNFS_USER_XATTR_SIZE_LIMIT	(128 << 10)
47cf9e5a73STejun Heo
48cf9e5a73STejun Heoenum kernfs_node_flag {
49d35258efSTejun Heo	KERNFS_ACTIVATED	= 0x0010,
50df23fc39STejun Heo	KERNFS_NS		= 0x0020,
51df23fc39STejun Heo	KERNFS_HAS_SEQ_SHOW	= 0x0040,
52df23fc39STejun Heo	KERNFS_HAS_MMAP		= 0x0080,
53df23fc39STejun Heo	KERNFS_LOCKDEP		= 0x0100,
546b0afc2aSTejun Heo	KERNFS_SUICIDAL		= 0x0400,
556b0afc2aSTejun Heo	KERNFS_SUICIDED		= 0x0800,
56ea015218SEric W. Biederman	KERNFS_EMPTY_DIR	= 0x1000,
570e67db2fSTejun Heo	KERNFS_HAS_RELEASE	= 0x2000,
58cf9e5a73STejun Heo};
59cf9e5a73STejun Heo
60d35258efSTejun Heo/* @flags for kernfs_create_root() */
61d35258efSTejun Heoenum kernfs_root_flag {
62555724a8STejun Heo	/*
63555724a8STejun Heo	 * kernfs_nodes are created in the deactivated state and invisible.
64555724a8STejun Heo	 * They require explicit kernfs_activate() to become visible.  This
65555724a8STejun Heo	 * can be used to make related nodes become visible atomically
66555724a8STejun Heo	 * after all nodes are created successfully.
67555724a8STejun Heo	 */
68555724a8STejun Heo	KERNFS_ROOT_CREATE_DEACTIVATED		= 0x0001,
69555724a8STejun Heo
70555724a8STejun Heo	/*
710d1a393dSChristina Quast	 * For regular files, if the opener has CAP_DAC_OVERRIDE, open(2)
72555724a8STejun Heo	 * succeeds regardless of the RW permissions.  sysfs had an extra
73555724a8STejun Heo	 * layer of enforcement where open(2) fails with -EACCES regardless
74555724a8STejun Heo	 * of CAP_DAC_OVERRIDE if the permission doesn't have the
75555724a8STejun Heo	 * respective read or write access at all (none of S_IRUGO or
76555724a8STejun Heo	 * S_IWUGO) or the respective operation isn't implemented.  The
77555724a8STejun Heo	 * following flag enables that behavior.
78555724a8STejun Heo	 */
79555724a8STejun Heo	KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK	= 0x0002,
80aa818825SShaohua Li
81aa818825SShaohua Li	/*
82aa818825SShaohua Li	 * The filesystem supports exportfs operation, so userspace can use
83aa818825SShaohua Li	 * fhandle to access nodes of the fs.
84aa818825SShaohua Li	 */
85aa818825SShaohua Li	KERNFS_ROOT_SUPPORT_EXPORTOP		= 0x0004,
860c47383bSDaniel Xu
870c47383bSDaniel Xu	/*
880c47383bSDaniel Xu	 * Support user xattrs to be written to nodes rooted at this root.
890c47383bSDaniel Xu	 */
900c47383bSDaniel Xu	KERNFS_ROOT_SUPPORT_USER_XATTR		= 0x0008,
91d35258efSTejun Heo};
92d35258efSTejun Heo
93324a56e1STejun Heo/* type-specific structures for kernfs_node union members */
94324a56e1STejun Heostruct kernfs_elem_dir {
95cf9e5a73STejun Heo	unsigned long		subdirs;
96adc5e8b5STejun Heo	/* children rbtree starts here and goes through kn->rb */
97cf9e5a73STejun Heo	struct rb_root		children;
98cf9e5a73STejun Heo
99cf9e5a73STejun Heo	/*
100cf9e5a73STejun Heo	 * The kernfs hierarchy this directory belongs to.  This fits
101324a56e1STejun Heo	 * better directly in kernfs_node but is here to save space.
102cf9e5a73STejun Heo	 */
103cf9e5a73STejun Heo	struct kernfs_root	*root;
104895adbecSIan Kent	/*
105895adbecSIan Kent	 * Monotonic revision counter, used to identify if a directory
106895adbecSIan Kent	 * node has changed during negative dentry revalidation.
107895adbecSIan Kent	 */
108895adbecSIan Kent	unsigned long		rev;
109cf9e5a73STejun Heo};
110cf9e5a73STejun Heo
111324a56e1STejun Heostruct kernfs_elem_symlink {
112324a56e1STejun Heo	struct kernfs_node	*target_kn;
113cf9e5a73STejun Heo};
114cf9e5a73STejun Heo
115324a56e1STejun Heostruct kernfs_elem_attr {
116cf9e5a73STejun Heo	const struct kernfs_ops	*ops;
117c525aaddSTejun Heo	struct kernfs_open_node	*open;
118cf9e5a73STejun Heo	loff_t			size;
119ecca47ceSTejun Heo	struct kernfs_node	*notify_next;	/* for kernfs_notify() */
120cf9e5a73STejun Heo};
121cf9e5a73STejun Heo
122cf9e5a73STejun Heo/*
123324a56e1STejun Heo * kernfs_node - the building block of kernfs hierarchy.  Each and every
124324a56e1STejun Heo * kernfs node is represented by single kernfs_node.  Most fields are
125cf9e5a73STejun Heo * private to kernfs and shouldn't be accessed directly by kernfs users.
126cf9e5a73STejun Heo *
12721774fd8SWillem de Bruijn * As long as count reference is held, the kernfs_node itself is
128324a56e1STejun Heo * accessible.  Dereferencing elem or any other outer entity requires
129324a56e1STejun Heo * active reference.
130cf9e5a73STejun Heo */
131324a56e1STejun Heostruct kernfs_node {
132adc5e8b5STejun Heo	atomic_t		count;
133adc5e8b5STejun Heo	atomic_t		active;
134cf9e5a73STejun Heo#ifdef CONFIG_DEBUG_LOCK_ALLOC
135cf9e5a73STejun Heo	struct lockdep_map	dep_map;
136cf9e5a73STejun Heo#endif
1373eef34adSTejun Heo	/*
1383eef34adSTejun Heo	 * Use kernfs_get_parent() and kernfs_name/path() instead of
1393eef34adSTejun Heo	 * accessing the following two fields directly.  If the node is
1403eef34adSTejun Heo	 * never moved to a different parent, it is safe to access the
1413eef34adSTejun Heo	 * parent directly.
1423eef34adSTejun Heo	 */
143adc5e8b5STejun Heo	struct kernfs_node	*parent;
144adc5e8b5STejun Heo	const char		*name;
145cf9e5a73STejun Heo
146adc5e8b5STejun Heo	struct rb_node		rb;
147cf9e5a73STejun Heo
148adc5e8b5STejun Heo	const void		*ns;	/* namespace tag */
1499b0925a6SGreg Kroah-Hartman	unsigned int		hash;	/* ns + name hash */
150cf9e5a73STejun Heo	union {
151adc5e8b5STejun Heo		struct kernfs_elem_dir		dir;
152adc5e8b5STejun Heo		struct kernfs_elem_symlink	symlink;
153adc5e8b5STejun Heo		struct kernfs_elem_attr		attr;
154cf9e5a73STejun Heo	};
155cf9e5a73STejun Heo
156cf9e5a73STejun Heo	void			*priv;
157cf9e5a73STejun Heo
15867c0496eSTejun Heo	/*
15940430452STejun Heo	 * 64bit unique ID.  On 64bit ino setups, id is the ino.  On 32bit,
16040430452STejun Heo	 * the low 32bits are ino and upper generation.
16167c0496eSTejun Heo	 */
16267c0496eSTejun Heo	u64			id;
16367c0496eSTejun Heo
164adc5e8b5STejun Heo	unsigned short		flags;
165adc5e8b5STejun Heo	umode_t			mode;
166c525aaddSTejun Heo	struct kernfs_iattrs	*iattr;
167cf9e5a73STejun Heo};
168b8441ed2STejun Heo
16980b9bbefSTejun Heo/*
17090c07c89STejun Heo * kernfs_syscall_ops may be specified on kernfs_create_root() to support
17190c07c89STejun Heo * syscalls.  These optional callbacks are invoked on the matching syscalls
17290c07c89STejun Heo * and can perform any kernfs operations which don't necessarily have to be
17390c07c89STejun Heo * the exact operation requested.  An active reference is held for each
17490c07c89STejun Heo * kernfs_node parameter.
17580b9bbefSTejun Heo */
17690c07c89STejun Heostruct kernfs_syscall_ops {
1776a7fed4eSTejun Heo	int (*show_options)(struct seq_file *sf, struct kernfs_root *root);
1786a7fed4eSTejun Heo
17980b9bbefSTejun Heo	int (*mkdir)(struct kernfs_node *parent, const char *name,
18080b9bbefSTejun Heo		     umode_t mode);
18180b9bbefSTejun Heo	int (*rmdir)(struct kernfs_node *kn);
18280b9bbefSTejun Heo	int (*rename)(struct kernfs_node *kn, struct kernfs_node *new_parent,
18380b9bbefSTejun Heo		      const char *new_name);
1844f41fc59SSerge E. Hallyn	int (*show_path)(struct seq_file *sf, struct kernfs_node *kn,
1854f41fc59SSerge E. Hallyn			 struct kernfs_root *root);
18680b9bbefSTejun Heo};
18780b9bbefSTejun Heo
188ba7443bcSTejun Heostruct kernfs_root {
189ba7443bcSTejun Heo	/* published fields */
190324a56e1STejun Heo	struct kernfs_node	*kn;
191d35258efSTejun Heo	unsigned int		flags;	/* KERNFS_ROOT_* flags */
192bc755553STejun Heo
193bc755553STejun Heo	/* private fields, do not use outside kernfs proper */
1947d35079fSShaohua Li	struct idr		ino_idr;
19540430452STejun Heo	u32			last_id_lowbits;
19640430452STejun Heo	u32			id_highbits;
19790c07c89STejun Heo	struct kernfs_syscall_ops *syscall_ops;
1987d568a83STejun Heo
1997ba0273bSIan Kent	/* list of kernfs_super_info of this root, protected by kernfs_rwsem */
2007d568a83STejun Heo	struct list_head	supers;
2017d568a83STejun Heo
202abd54f02STejun Heo	wait_queue_head_t	deactivate_waitq;
203393c3714SMinchan Kim	struct rw_semaphore	kernfs_rwsem;
204ba7443bcSTejun Heo};
205ba7443bcSTejun Heo
206c525aaddSTejun Heostruct kernfs_open_file {
207dd8a5b03STejun Heo	/* published fields */
208324a56e1STejun Heo	struct kernfs_node	*kn;
209dd8a5b03STejun Heo	struct file		*file;
2100e67db2fSTejun Heo	struct seq_file		*seq_file;
2112536390dSTejun Heo	void			*priv;
212dd8a5b03STejun Heo
213dd8a5b03STejun Heo	/* private fields, do not use outside kernfs proper */
214dd8a5b03STejun Heo	struct mutex		mutex;
215e4234a1fSChris Wilson	struct mutex		prealloc_mutex;
216dd8a5b03STejun Heo	int			event;
217dd8a5b03STejun Heo	struct list_head	list;
2182b75869bSNeilBrown	char			*prealloc_buf;
219dd8a5b03STejun Heo
220b7ce40cfSTejun Heo	size_t			atomic_write_len;
221a1d82affSTejun Heo	bool			mmapped:1;
2220e67db2fSTejun Heo	bool			released:1;
223dd8a5b03STejun Heo	const struct vm_operations_struct *vm_ops;
224dd8a5b03STejun Heo};
225dd8a5b03STejun Heo
226f6acf8bbSTejun Heostruct kernfs_ops {
2270e67db2fSTejun Heo	/*
2280e67db2fSTejun Heo	 * Optional open/release methods.  Both are called with
2290e67db2fSTejun Heo	 * @of->seq_file populated.
2300e67db2fSTejun Heo	 */
2310e67db2fSTejun Heo	int (*open)(struct kernfs_open_file *of);
2320e67db2fSTejun Heo	void (*release)(struct kernfs_open_file *of);
2330e67db2fSTejun Heo
234f6acf8bbSTejun Heo	/*
235f6acf8bbSTejun Heo	 * Read is handled by either seq_file or raw_read().
236f6acf8bbSTejun Heo	 *
237d19b9846STejun Heo	 * If seq_show() is present, seq_file path is active.  Other seq
238d19b9846STejun Heo	 * operations are optional and if not implemented, the behavior is
239d19b9846STejun Heo	 * equivalent to single_open().  @sf->private points to the
240c525aaddSTejun Heo	 * associated kernfs_open_file.
241f6acf8bbSTejun Heo	 *
242f6acf8bbSTejun Heo	 * read() is bounced through kernel buffer and a read larger than
243f6acf8bbSTejun Heo	 * PAGE_SIZE results in partial operation of PAGE_SIZE.
244f6acf8bbSTejun Heo	 */
245f6acf8bbSTejun Heo	int (*seq_show)(struct seq_file *sf, void *v);
246d19b9846STejun Heo
247d19b9846STejun Heo	void *(*seq_start)(struct seq_file *sf, loff_t *ppos);
248d19b9846STejun Heo	void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos);
249d19b9846STejun Heo	void (*seq_stop)(struct seq_file *sf, void *v);
250f6acf8bbSTejun Heo
251c525aaddSTejun Heo	ssize_t (*read)(struct kernfs_open_file *of, char *buf, size_t bytes,
252f6acf8bbSTejun Heo			loff_t off);
253f6acf8bbSTejun Heo
254f6acf8bbSTejun Heo	/*
2554d3773c4STejun Heo	 * write() is bounced through kernel buffer.  If atomic_write_len
2564d3773c4STejun Heo	 * is not set, a write larger than PAGE_SIZE results in partial
2574d3773c4STejun Heo	 * operations of PAGE_SIZE chunks.  If atomic_write_len is set,
2584d3773c4STejun Heo	 * writes upto the specified size are executed atomically but
2594d3773c4STejun Heo	 * larger ones are rejected with -E2BIG.
260f6acf8bbSTejun Heo	 */
2614d3773c4STejun Heo	size_t atomic_write_len;
2622b75869bSNeilBrown	/*
2632b75869bSNeilBrown	 * "prealloc" causes a buffer to be allocated at open for
2642b75869bSNeilBrown	 * all read/write requests.  As ->seq_show uses seq_read()
2652b75869bSNeilBrown	 * which does its own allocation, it is incompatible with
2662b75869bSNeilBrown	 * ->prealloc.  Provide ->read and ->write with ->prealloc.
2672b75869bSNeilBrown	 */
2682b75869bSNeilBrown	bool prealloc;
269c525aaddSTejun Heo	ssize_t (*write)(struct kernfs_open_file *of, char *buf, size_t bytes,
270f6acf8bbSTejun Heo			 loff_t off);
271f6acf8bbSTejun Heo
272147e1a97SJohannes Weiner	__poll_t (*poll)(struct kernfs_open_file *of,
273147e1a97SJohannes Weiner			 struct poll_table_struct *pt);
274147e1a97SJohannes Weiner
275c525aaddSTejun Heo	int (*mmap)(struct kernfs_open_file *of, struct vm_area_struct *vma);
276f6acf8bbSTejun Heo};
277f6acf8bbSTejun Heo
27823bf1b6bSDavid Howells/*
27923bf1b6bSDavid Howells * The kernfs superblock creation/mount parameter context.
28023bf1b6bSDavid Howells */
28123bf1b6bSDavid Howellsstruct kernfs_fs_context {
28223bf1b6bSDavid Howells	struct kernfs_root	*root;		/* Root of the hierarchy being mounted */
28323bf1b6bSDavid Howells	void			*ns_tag;	/* Namespace tag of the mount (or NULL) */
28423bf1b6bSDavid Howells	unsigned long		magic;		/* File system specific magic number */
28523bf1b6bSDavid Howells
28623bf1b6bSDavid Howells	/* The following are set/used by kernfs_mount() */
28723bf1b6bSDavid Howells	bool			new_sb_created;	/* Set to T if we allocated a new sb */
28823bf1b6bSDavid Howells};
28923bf1b6bSDavid Howells
290ba341d55STejun Heo#ifdef CONFIG_KERNFS
291879f40d1STejun Heo
292df23fc39STejun Heostatic inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn)
293cf9e5a73STejun Heo{
294df23fc39STejun Heo	return kn->flags & KERNFS_TYPE_MASK;
295cf9e5a73STejun Heo}