1/*
2  FUSE: Filesystem in Userspace
3  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4
5  This program can be distributed under the terms of the GNU GPL.
6  See the file COPYING.
7*/
8
9#ifndef _FS_FUSE_I_H
10#define _FS_FUSE_I_H
11
12#ifndef pr_fmt
13# define pr_fmt(fmt) "fuse: " fmt
14#endif
15
16#include <linux/fuse.h>
17#include <linux/fs.h>
18#include <linux/mount.h>
19#include <linux/wait.h>
20#include <linux/list.h>
21#include <linux/spinlock.h>
22#include <linux/mm.h>
23#include <linux/backing-dev.h>
24#include <linux/mutex.h>
25#include <linux/rwsem.h>
26#include <linux/rbtree.h>
27#include <linux/poll.h>
28#include <linux/workqueue.h>
29#include <linux/kref.h>
30#include <linux/xattr.h>
31#include <linux/pid_namespace.h>
32#include <linux/refcount.h>
33#include <linux/user_namespace.h>
34
35/** Default max number of pages that can be used in a single read request */
36#define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32
37
38/** Maximum of max_pages received in init_out */
39#define FUSE_MAX_MAX_PAGES 256
40
41/** Bias for fi->writectr, meaning new writepages must not be sent */
42#define FUSE_NOWRITE INT_MIN
43
44/** It could be as large as PATH_MAX, but would that have any uses? */
45#define FUSE_NAME_MAX 1024
46
47/** Number of dentries for each connection in the control filesystem */
48#define FUSE_CTL_NUM_DENTRIES 5
49
50/** List of active connections */
51extern struct list_head fuse_conn_list;
52
53/** Global mutex protecting fuse_conn_list and the control filesystem */
54extern struct mutex fuse_mutex;
55
56/** Module parameters */
57extern unsigned max_user_bgreq;
58extern unsigned max_user_congthresh;
59
60/* One forget request */
61struct fuse_forget_link {
62	struct fuse_forget_one forget_one;
63	struct fuse_forget_link *next;
64};
65
66/* Submount lookup tracking */
67struct fuse_submount_lookup {
68	/** Refcount */
69	refcount_t count;
70
71	/** Unique ID, which identifies the inode between userspace
72	 * and kernel */
73	u64 nodeid;
74
75	/** The request used for sending the FORGET message */
76	struct fuse_forget_link *forget;
77};
78
79/** Container for data related to mapping to backing file */
80struct fuse_backing {
81	struct file *file;
82	struct cred *cred;
83
84	/** refcount */
85	refcount_t count;
86	struct rcu_head rcu;
87};
88
89/** FUSE inode */
90struct fuse_inode {
91	/** Inode data */
92	struct inode inode;
93
94	/** Unique ID, which identifies the inode between userspace
95	 * and kernel */
96	u64 nodeid;
97
98	/** Number of lookups on this inode */
99	u64 nlookup;
100
101	/** The request used for sending the FORGET message */
102	struct fuse_forget_link *forget;
103
104	/** Time in jiffies until the file attributes are valid */
105	u64 i_time;
106
107	/* Which attributes are invalid */
108	u32 inval_mask;
109
110	/** The sticky bit in inode->i_mode may have been removed, so
111	    preserve the original mode */
112	umode_t orig_i_mode;
113
114	/* Cache birthtime */
115	struct timespec64 i_btime;
116
117	/** 64 bit inode number */
118	u64 orig_ino;
119
120	/** Version of last attribute change */
121	u64 attr_version;
122
123	union {
124		/* read/write io cache (regular file only) */
125		struct {
126			/* Files usable in writepage.  Protected by fi->lock */
127			struct list_head write_files;
128
129			/* Writepages pending on truncate or fsync */
130			struct list_head queued_writes;
131
132			/* Number of sent writes, a negative bias
133			 * (FUSE_NOWRITE) means more writes are blocked */
134			int writectr;
135
136			/** Number of files/maps using page cache */
137			int iocachectr;
138
139			/* Waitq for writepage completion */
140			wait_queue_head_t page_waitq;
141
142			/* waitq for direct-io completion */
143			wait_queue_head_t direct_io_waitq;
144
145			/* List of writepage requestst (pending or sent) */
146			struct rb_root writepages;
147		};
148
149		/* readdir cache (directory only) */
150		struct {
151			/* true if fully cached */
152			bool cached;
153
154			/* size of cache */
155			loff_t size;
156
157			/* position at end of cache (position of next entry) */
158			loff_t pos;
159
160			/* version of the cache */
161			u64 version;
162
163			/* modification time of directory when cache was
164			 * started */
165			struct timespec64 mtime;
166
167			/* iversion of directory when cache was started */
168			u64 iversion;
169
170			/* protects above fields */
171			spinlock_t lock;
172		} rdc;
173	};
174
175	/** Miscellaneous bits describing inode state */
176	unsigned long state;
177
178	/** Lock for serializing lookup and readdir for back compatibility*/
179	struct mutex mutex;
180
181	/** Lock to protect write related fields */
182	spinlock_t lock;
183
184#ifdef CONFIG_FUSE_DAX
185	/*
186	 * Dax specific inode data
187	 */
188	struct fuse_inode_dax *dax;
189#endif
190	/** Submount specific lookup tracking */
191	struct fuse_submount_lookup *submount_lookup;
192#ifdef CONFIG_FUSE_PASSTHROUGH
193	/** Reference to backing file in passthrough mode */
194	struct fuse_backing *fb;
195#endif
196};
197
198/** FUSE inode state bits */
199enum {
200	/** Advise readdirplus  */
201	FUSE_I_ADVISE_RDPLUS,
202	/** Initialized with readdirplus */
203	FUSE_I_INIT_RDPLUS,
204	/** An operation changing file size is in progress  */
205	FUSE_I_SIZE_UNSTABLE,
206	/* Bad inode */
207	FUSE_I_BAD,
208	/* Has btime */
209	FUSE_I_BTIME,
210	/* Wants or already has page cache IO */
211	FUSE_I_CACHE_IO_MODE,
212};
213
214struct fuse_conn;
215struct fuse_mount;
216union fuse_file_args;
217
218/** FUSE specific file data */
219struct fuse_file {
220	/** Fuse connection for this file */
221	struct fuse_mount *fm;
222
223	/* Argument space reserved for open/release */
224	union fuse_file_args *args;
225
226	/** Kernel file handle guaranteed to be unique */
227	u64 kh;
228
229	/** File handle used by userspace */
230	u64 fh;
231
232	/** Node id of this file */
233	u64 nodeid;
234
235	/** Refcount */
236	refcount_t count;
237
238	/** FOPEN_* flags returned by open */
239	u32 open_flags;
240
241	/** Entry on inode's write_files list */
242	struct list_head write_entry;
243
244	/* Readdir related */
245	struct {
246		/* Dir stream position */
247		loff_t pos;
248
249		/* Offset in cache */
250		loff_t cache_off;
251
252		/* Version of cache we are reading */
253		u64 version;
254
255	} readdir;
256
257	/** RB node to be linked on fuse_conn->polled_files */
258	struct rb_node polled_node;
259
260	/** Wait queue head for poll */
261	wait_queue_head_t poll_wait;
262
263	/** Does file hold a fi->iocachectr refcount? */
264	enum { IOM_NONE, IOM_CACHED, IOM_UNCACHED } iomode;
265
266#ifdef CONFIG_FUSE_PASSTHROUGH
267	/** Reference to backing file in passthrough mode */
268	struct file *passthrough;
269	const struct cred *cred;
270#endif
271
272	/** Has flock been performed on this file? */
273	bool flock:1;
274};
275
276/** One input argument of a request */
277struct fuse_in_arg {
278	unsigned size;
279	const void *value;
280};
281
282/** One output argument of a request */
283struct fuse_arg {
284	unsigned size;
285	void *value;
286};
287
288/** FUSE page descriptor */
289struct fuse_page_desc {
290	unsigned int length;
291	unsigned int offset;
292};
293
294struct fuse_args {
295	uint64_t nodeid;
296	uint32_t opcode;
297	uint8_t in_numargs;
298	uint8_t out_numargs;
299	uint8_t ext_idx;
300	bool force:1;
301	bool noreply:1;
302	bool nocreds:1;
303	bool in_pages:1;
304	bool out_pages:1;
305	bool user_pages:1;
306	bool out_argvar:1;
307	bool page_zeroing:1;
308	bool page_replace:1;
309	bool may_block:1;
310	bool is_ext:1;
311	bool is_pinned:1;
312	struct fuse_in_arg in_args[3];
313	struct fuse_arg out_args[2];
314	void (*end)(struct fuse_mount *fm, struct fuse_args *args, int error);
315};
316
317struct fuse_args_pages {
318	struct fuse_args args;
319	struct page **pages;
320	struct fuse_page_desc *descs;
321	unsigned int num_pages;
322};
323
324struct fuse_release_args {
325	struct fuse_args args;
326	struct fuse_release_in inarg;
327	struct inode *inode;
328};
329
330union fuse_file_args {
331	/* Used during open() */
332	struct fuse_open_out open_outarg;
333	/* Used during release() */
334	struct fuse_release_args release_args;
335};
336
337#define FUSE_ARGS(args) struct fuse_args args = {}
338
339/** The request IO state (for asynchronous processing) */
340struct fuse_io_priv {
341	struct kref refcnt;
342	int async;
343	spinlock_t lock;
344	unsigned reqs;
345	ssize_t bytes;
346	size_t size;
347	__u64 offset;
348	bool write;
349	bool should_dirty;
350	int err;
351	struct kiocb *iocb;
352	struct completion *done;
353	bool blocking;
354};
355
356#define FUSE_IO_PRIV_SYNC(i) \
357{					\
358	.refcnt = KREF_INIT(1),		\
359	.async = 0,			\
360	.iocb = i,			\
361}
362
363/**
364 * Request flags
365 *
366 * FR_ISREPLY:		set if the request has reply
367 * FR_FORCE:		force sending of the request even if interrupted
368 * FR_BACKGROUND:	request is sent in the background
369 * FR_WAITING:		request is counted as "waiting"
370 * FR_ABORTED:		the request was aborted
371 * FR_INTERRUPTED:	the request has been interrupted
372 * FR_LOCKED:		data is being copied to/from the request
373 * FR_PENDING:		request is not yet in userspace
374 * FR_SENT:		request is in userspace, waiting for an answer
375 * FR_FINISHED:		request is finished
376 * FR_PRIVATE:		request is on private list
377 * FR_ASYNC:		request is asynchronous
378 */
379enum fuse_req_flag {
380	FR_ISREPLY,
381	FR_FORCE,
382	FR_BACKGROUND,
383	FR_WAITING,
384	FR_ABORTED,
385	FR_INTERRUPTED,
386	FR_LOCKED,
387	FR_PENDING,
388	FR_SENT,
389	FR_FINISHED,
390	FR_PRIVATE,
391	FR_ASYNC,
392};
393
394/**
395 * A request to the client
396 *
397 * .waitq.lock protects the following fields:
398 *   - FR_ABORTED
399 *   - FR_LOCKED (may also be modified under fc->lock, tested under both)
400 */
401struct fuse_req {
402	/** This can be on either pending processing or io lists in
403	    fuse_conn */
404	struct list_head list;
405
406	/** Entry on the interrupts list  */
407	struct list_head intr_entry;
408
409	/* Input/output arguments */
410	struct fuse_args *args;
411
412	/** refcount */
413	refcount_t count;
414
415	/* Request flags, updated with test/set/clear_bit() */
416	unsigned long flags;
417
418	/* The request input header */
419	struct {
420		struct fuse_in_header h;
421	} in;
422
423	/* The request output header */
424	struct {
425		struct fuse_out_header h;
426	} out;
427
428	/** Used to wake up the task waiting for completion of request*/
429	wait_queue_head_t waitq;
430
431#if IS_ENABLED(CONFIG_VIRTIO_FS)
432	/** virtio-fs's physically contiguous buffer for in and out args */
433	void *argbuf;
434#endif
435
436	/** fuse_mount this request belongs to */
437	struct fuse_mount *fm;
438};
439
440struct fuse_iqueue;
441
442/**
443 * Input queue callbacks
444 *
445 * Input queue signalling is device-specific.  For example, the /dev/fuse file
446 * uses fiq->waitq and fasync to wake processes that are waiting on queue
447 * readiness.  These callbacks allow other device types to respond to input
448 * queue activity.
449 */
450struct fuse_iqueue_ops {
451	/**
452	 * Signal that a forget has been queued
453	 */
454	void (*wake_forget_and_unlock)(struct fuse_iqueue *fiq)
455		__releases(fiq->lock);
456
457	/**
458	 * Signal that an INTERRUPT request has been queued
459	 */
460	void (*wake_interrupt_and_unlock)(struct fuse_iqueue *fiq)
461		__releases(fiq->lock);
462
463	/**
464	 * Signal that a request has been queued
465	 */
466	void (*wake_pending_and_unlock)(struct fuse_iqueue *fiq)
467		__releases(fiq->lock);
468
469	/**
470	 * Clean up when fuse_iqueue is destroyed
471	 */
472	void (*release)(struct fuse_iqueue *fiq);
473};
474
475/** /dev/fuse input queue operations */
476extern const struct fuse_iqueue_ops fuse_dev_fiq_ops;
477
478struct fuse_iqueue {
479	/** Connection established */
480	unsigned connected;
481
482	/** Lock protecting accesses to members of this structure */
483	spinlock_t lock;
484
485	/** Readers of the connection are waiting on this */
486	wait_queue_head_t waitq;
487
488	/** The next unique request id */
489	u64 reqctr;
490
491	/** The list of pending requests */
492	struct list_head pending;
493
494	/** Pending interrupts */
495	struct list_head interrupts;
496
497	/** Queue of pending forgets */
498	struct fuse_forget_link forget_list_head;
499	struct fuse_forget_link *forget_list_tail;
500
501	/** Batching of FORGET requests (positive indicates FORGET batch) */
502	int forget_batch;
503
504	/** O_ASYNC requests */
505	struct fasync_struct *fasync;
506
507	/** Device-specific callbacks */
508	const struct fuse_iqueue_ops *ops;
509
510	/** Device-specific state */
511	void *priv;
512};
513
514#define FUSE_PQ_HASH_BITS 8
515#define FUSE_PQ_HASH_SIZE (1 << FUSE_PQ_HASH_BITS)
516
517struct fuse_pqueue {
518	/** Connection established */
519	unsigned connected;
520
521	/** Lock protecting accessess to  members of this structure */
522	spinlock_t lock;
523
524	/** Hash table of requests being processed */
525	struct list_head *processing;
526
527	/** The list of requests under I/O */
528	struct list_head io;
529};
530
531/**
532 * Fuse device instance
533 */
534struct fuse_dev {
535	/** Fuse connection for this device */
536	struct fuse_conn *fc;
537
538	/** Processing queue */
539	struct fuse_pqueue pq;
540
541	/** list entry on fc->devices */
542	struct list_head entry;
543};
544
545enum fuse_dax_mode {
546	FUSE_DAX_INODE_DEFAULT,	/* default */
547	FUSE_DAX_ALWAYS,	/* "-o dax=always" */
548	FUSE_DAX_NEVER,		/* "-o dax=never" */
549	FUSE_DAX_INODE_USER,	/* "-o dax=inode" */
550};
551
552static inline bool fuse_is_inode_dax_mode(enum fuse_dax_mode mode)
553{
554	return mode == FUSE_DAX_INODE_DEFAULT || mode == FUSE_DAX_INODE_USER;
555}
556
557struct fuse_fs_context {
558	int fd;
559	struct file *file;
560	unsigned int rootmode;
561	kuid_t user_id;
562	kgid_t group_id;
563	bool is_bdev:1;
564	bool fd_present:1;
565	bool rootmode_present:1;
566	bool user_id_present:1;
567	bool group_id_present:1;
568	bool default_permissions:1;
569	bool allow_other:1;
570	bool destroy:1;
571	bool no_control:1;
572	bool no_force_umount:1;
573	bool legacy_opts_show:1;
574	enum fuse_dax_mode dax_mode;
575	unsigned int max_read;
576	unsigned int blksize;
577	const char *subtype;
578
579	/* DAX device, may be NULL */
580	struct dax_device *dax_dev;
581
582	/* fuse_dev pointer to fill in, should contain NULL on entry */
583	void **fudptr;
584};
585
586struct fuse_sync_bucket {
587	/* count is a possible scalability bottleneck */
588	atomic_t count;
589	wait_queue_head_t waitq;
590	struct rcu_head rcu;
591};
592
593/**
594 * A Fuse connection.
595 *
596 * This structure is created, when the root filesystem is mounted, and
597 * is destroyed, when the client device is closed and the last
598 * fuse_mount is destroyed.
599 */
600struct fuse_conn {
601	/** Lock protecting accessess to  members of this structure */
602	spinlock_t lock;
603
604	/** Refcount */
605	refcount_t count;
606
607	/** Number of fuse_dev's */
608	atomic_t dev_count;
609
610	struct rcu_head rcu;
611
612	/** The user id for this mount */
613	kuid_t user_id;
614
615	/** The group id for this mount */
616	kgid_t group_id;
617
618	/** The pid namespace for this mount */
619	struct pid_namespace *pid_ns;
620
621	/** The user namespace for this mount */
622	struct user_namespace *user_ns;
623
624	/** Maximum read size */
625	unsigned max_read;
626
627	/** Maximum write size */
628	unsigned max_write;
629
630	/** Maximum number of pages that can be used in a single request */
631	unsigned int max_pages;
632
633	/** Constrain ->max_pages to this value during feature negotiation */
634	unsigned int max_pages_limit;
635
636	/** Input queue */
637	struct fuse_iqueue iq;
638
639	/** The next unique kernel file handle */
640	atomic64_t khctr;
641
642	/** rbtree of fuse_files waiting for poll events indexed by ph */
643	struct rb_root polled_files;
644
645	/** Maximum number of outstanding background requests */
646	unsigned max_background;
647
648	/** Number of background requests at which congestion starts */
649	unsigned congestion_threshold;
650
651	/** Number of requests currently in the background */
652	unsigned num_background;
653
654	/** Number of background requests currently queued for userspace */
655	unsigned active_background;
656
657	/** The list of background requests set aside for later queuing */
658	struct list_head bg_queue;
659
660	/** Protects: max_background, congestion_threshold, num_background,
661	 * active_background, bg_queue, blocked */
662	spinlock_t bg_lock;
663
664	/** Flag indicating that INIT reply has been received. Allocating
665	 * any fuse request will be suspended until the flag is set */
666	int initialized;
667
668	/** Flag indicating if connection is blocked.  This will be
669	    the case before the INIT reply is received, and if there
670	    are too many outstading backgrounds requests */
671	int blocked;
672
673	/** waitq for blocked connection */
674	wait_queue_head_t blocked_waitq;
675
676	/** Connection established, cleared on umount, connection
677	    abort and device release */
678	unsigned connected;
679
680	/** Connection aborted via sysfs */
681	bool aborted;
682
683	/** Connection failed (version mismatch).  Cannot race with
684	    setting other bitfields since it is only set once in INIT
685	    reply, before any other request, and never cleared */
686	unsigned conn_error:1;
687
688	/** Connection successful.  Only set in INIT */
689	unsigned conn_init:1;
690
691	/** Do readahead asynchronously?  Only set in INIT */
692	unsigned async_read:1;
693
694	/** Return an unique read error after abort.  Only set in INIT */
695	unsigned abort_err:1;
696
697	/** Do not send separate SETATTR request before open(O_TRUNC)  */
698	unsigned atomic_o_trunc:1;
699
700	/** Filesystem supports NFS exporting.  Only set in INIT */
701	unsigned export_support:1;
702
703	/** write-back cache policy (default is write-through) */
704	unsigned writeback_cache:1;
705
706	/** allow parallel lookups and readdir (default is serialized) */
707	unsigned parallel_dirops:1;
708
709	/** handle fs handles killing suid/sgid/cap on write/chown/trunc */
710	unsigned handle_killpriv:1;
711
712	/** cache READLINK responses in page cache */
713	unsigned cache_symlinks:1;
714
715	/* show legacy mount options */
716	unsigned int legacy_opts_show:1;
717
718	/*
719	 * fs kills suid/sgid/cap on write/chown/trunc. suid is killed on
720	 * write/trunc only if caller did not have CAP_FSETID.  sgid is killed
721	 * on write/truncate only if caller did not have CAP_FSETID as well as
722	 * file has group execute permission.
723	 */
724	unsigned handle_killpriv_v2:1;
725
726	/*
727	 * The following bitfields are only for optimization purposes
728	 * and hence races in setting them will not cause malfunction
729	 */
730
731	/** Is open/release not implemented by fs? */
732	unsigned no_open:1;
733
734	/** Is opendir/releasedir not implemented by fs? */
735	unsigned no_opendir:1;
736
737	/** Is fsync not implemented by fs? */
738	unsigned no_fsync:1;
739
740	/** Is fsyncdir not implemented by fs? */
741	unsigned no_fsyncdir:1;
742
743	/** Is flush not implemented by fs? */
744	unsigned no_flush:1;
745
746	/** Is setxattr not implemented by fs? */
747	unsigned no_setxattr:1;
748
749	/** Does file server support extended setxattr */
750	unsigned setxattr_ext:1;
751
752	/** Is getxattr not implemented by fs? */
753	unsigned no_getxattr:1;
754
755	/** Is listxattr not implemented by fs? */
756	unsigned no_listxattr:1;
757
758	/** Is removexattr not implemented by fs? */
759	unsigned no_removexattr:1;
760
761	/** Are posix file locking primitives not implemented by fs? */
762	unsigned no_lock:1;
763
764	/** Is access not implemented by fs? */
765	unsigned no_access:1;
766
767	/** Is create not implemented by fs? */
768	unsigned no_create:1;
769
770	/** Is interrupt not implemented by fs? */
771	unsigned no_interrupt:1;
772
773	/** Is bmap not implemented by fs? */
774	unsigned no_bmap:1;
775
776	/** Is poll not implemented by fs? */
777	unsigned no_poll:1;
778
779	/** Do multi-page cached writes */
780	unsigned big_writes:1;
781
782	/** Don't apply umask to creation modes */
783	unsigned dont_mask:1;
784
785	/** Are BSD file locking primitives not implemented by fs? */
786	unsigned no_flock:1;
787
788	/** Is fallocate not implemented by fs? */
789	unsigned no_fallocate:1;
790
791	/** Is rename with flags implemented by fs? */
792	unsigned no_rename2:1;
793
794	/** Use enhanced/automatic page cache invalidation. */
795	unsigned auto_inval_data:1;
796
797	/** Filesystem is fully responsible for page cache invalidation. */
798	unsigned explicit_inval_data:1;
799
800	/** Does the filesystem support readdirplus? */
801	unsigned do_readdirplus:1;
802
803	/** Does the filesystem want adaptive readdirplus? */
804	unsigned readdirplus_auto:1;
805
806	/** Does the filesystem support asynchronous direct-IO submission? */
807	unsigned async_dio:1;
808
809	/** Is lseek not implemented by fs? */
810	unsigned no_lseek:1;
811
812	/** Does the filesystem support posix acls? */
813	unsigned posix_acl:1;
814
815	/** Check permissions based on the file mode or not? */
816	unsigned default_permissions:1;
817
818	/** Allow other than the mounter user to access the filesystem ? */
819	unsigned allow_other:1;
820
821	/** Does the filesystem support copy_file_range? */
822	unsigned no_copy_file_range:1;
823
824	/* Send DESTROY request */
825	unsigned int destroy:1;
826
827	/* Delete dentries that have gone stale */
828	unsigned int delete_stale:1;
829
830	/** Do not create entry in fusectl fs */
831	unsigned int no_control:1;
832
833	/** Do not allow MNT_FORCE umount */
834	unsigned int no_force_umount:1;
835
836	/* Auto-mount submounts announced by the server */
837	unsigned int auto_submounts:1;
838
839	/* Propagate syncfs() to server */
840	unsigned int sync_fs:1;
841
842	/* Initialize security xattrs when creating a new inode */
843	unsigned int init_security:1;
844
845	/* Add supplementary group info when creating a new inode */
846	unsigned int create_supp_group:1;
847
848	/* Does the filesystem support per inode DAX? */
849	unsigned int inode_dax:1;
850
851	/* Is tmpfile not implemented by fs? */
852	unsigned int no_tmpfile:1;
853
854	/* Relax restrictions to allow shared mmap in FOPEN_DIRECT_IO mode */
855	unsigned int direct_io_allow_mmap:1;
856
857	/* Is statx not implemented by fs? */
858	unsigned int no_statx:1;
859
860	/** Passthrough support for read/write IO */
861	unsigned int passthrough:1;
862
863	/** Maximum stack depth for passthrough backing files */
864	int max_stack_depth;
865
866	/** The number of requests waiting for completion */
867	atomic_t num_waiting;
868
869	/** Negotiated minor version */
870	unsigned minor;
871
872	/** Entry on the fuse_mount_list */
873	struct list_head entry;
874
875	/** Device ID from the root super block */
876	dev_t dev;
877
878	/** Dentries in the control filesystem */
879	struct dentry *ctl_dentry[FUSE_CTL_NUM_DENTRIES];
880
881	/** number of dentries used in the above array */
882	int ctl_ndents;
883
884	/** Key for lock owner ID scrambling */
885	u32 scramble_key[4];
886
887	/** Version counter for attribute changes */
888	atomic64_t attr_version;
889
890	/** Called on final put */
891	void (*release)(struct fuse_conn *);
892
893	/**
894	 * Read/write semaphore to hold when accessing the sb of any
895	 * fuse_mount belonging to this connection
896	 */
897	struct rw_semaphore killsb;
898
899	/** List of device instances belonging to this connection */
900	struct list_head devices;
901
902#ifdef CONFIG_FUSE_DAX
903	/* Dax mode */
904	enum fuse_dax_mode dax_mode;
905
906	/* Dax specific conn data, non-NULL if DAX is enabled */
907	struct fuse_conn_dax *dax;
908#endif
909
910	/** List of filesystems using this connection */
911	struct list_head mounts;
912
913	/* New writepages go into this bucket */
914	struct fuse_sync_bucket __rcu *curr_bucket;
915
916#ifdef CONFIG_FUSE_PASSTHROUGH
917	/** IDR for backing files ids */
918	struct idr backing_files_map;
919#endif
920};
921
922/*
923 * Represents a mounted filesystem, potentially a submount.
924 *
925 * This object allows sharing a fuse_conn between separate mounts to
926 * allow submounts with dedicated superblocks and thus separate device
927 * IDs.
928 */
929struct fuse_mount {
930	/* Underlying (potentially shared) connection to the FUSE server */
931	struct fuse_conn *fc;
932
933	/*
934	 * Super block for this connection (fc->killsb must be held when
935	 * accessing this).
936	 */
937	struct super_block *sb;
938
939	/* Entry on fc->mounts */
940	struct list_head fc_entry;
941	struct rcu_head rcu;
942};
943
944static inline struct fuse_mount *get_fuse_mount_super(struct super_block *sb)
945{
946	return sb->s_fs_info;
947}
948
949static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
950{
951	return get_fuse_mount_super(sb)->fc;
952}
953
954static inline struct fuse_mount *get_fuse_mount(struct inode *inode)
955{
956	return get_fuse_mount_super(inode->i_sb);
957}
958
959static inline struct fuse_conn *get_fuse_conn(struct inode *inode)
960{
961	return get_fuse_mount_super(inode->i_sb)->fc;
962}
963
964static inline struct fuse_inode *get_fuse_inode(struct inode *inode)
965{
966	return container_of(inode, struct fuse_inode, inode);
967}
968
969static inline u64 get_node_id(struct inode *inode)
970{
971	return get_fuse_inode(inode)->nodeid;
972}
973
974static inline int invalid_nodeid(u64 nodeid)
975{
976	return !nodeid || nodeid == FUSE_ROOT_ID;
977}
978
979static inline u64 fuse_get_attr_version(struct fuse_conn *fc)
980{
981	return atomic64_read(&fc->attr_version);
982}
983
984static inline bool fuse_stale_inode(const struct inode *inode, int generation,
985				    struct fuse_attr *attr)
986{
987	return inode->i_generation != generation ||
988		inode_wrong_type(inode, attr->mode);
989}
990
991static inline void fuse_make_bad(struct inode *inode)
992{
993	set_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state);
994}
995
996static inline bool fuse_is_bad(struct inode *inode)
997{
998	return unlikely(test_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state));
999}
1000
1001static inline struct page **fuse_pages_alloc(unsigned int npages, gfp_t flags,
1002					     struct fuse_page_desc **desc)
1003{
1004	struct page **pages;
1005
1006	pages = kzalloc(npages * (sizeof(struct page *) +
1007				  sizeof(struct fuse_page_desc)), flags);
1008	*desc = (void *) (pages + npages);
1009
1010	return pages;
1011}
1012
1013static inline void fuse_page_descs_length_init(struct fuse_page_desc *descs,
1014					       unsigned int index,
1015					       unsigned int nr_pages)
1016{
1017	int i;
1018
1019	for (i = index; i < index + nr_pages; i++)
1020		descs[i].length = PAGE_SIZE - descs[i].offset;
1021}
1022
1023static inline void fuse_sync_bucket_dec(struct fuse_sync_bucket *bucket)
1024{
1025	/* Need RCU protection to prevent use after free after the decrement */
1026	rcu_read_lock();
1027	if (atomic_dec_and_test(&bucket->count))
1028		wake_up(&bucket->waitq);
1029	rcu_read_unlock();
1030}
1031
1032/** Device operations */
1033extern const struct file_operations fuse_dev_operations;
1034
1035extern const struct dentry_operations fuse_dentry_operations;
1036extern const struct dentry_operations fuse_root_dentry_operations;
1037
1038/**
1039 * Get a filled in inode
1040 */
1041struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
1042			int generation, struct fuse_attr *attr,
1043			u64 attr_valid, u64 attr_version);
1044
1045int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
1046		     struct fuse_entry_out *outarg, struct inode **inode);
1047
1048/**
1049 * Send FORGET command
1050 */
1051void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
1052		       u64 nodeid, u64 nlookup);
1053
1054struct fuse_forget_link *fuse_alloc_forget(void);
1055
1056struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq,
1057					     unsigned int max,
1058					     unsigned int *countp);
1059
1060/*
1061 * Initialize READ or READDIR request
1062 */
1063struct fuse_io_args {
1064	union {
1065		struct {
1066			struct fuse_read_in in;
1067			u64 attr_ver;
1068		} read;
1069		struct {
1070			struct fuse_write_in in;
1071			struct fuse_write_out out;
1072			bool page_locked;
1073		} write;
1074	};
1075	struct fuse_args_pages ap;
1076	struct fuse_io_priv *io;
1077	struct fuse_file *ff;
1078};
1079
1080void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos,
1081			 size_t count, int opcode);
1082
1083
1084struct fuse_file *fuse_file_alloc(struct fuse_mount *fm, bool release);
1085void fuse_file_free(struct fuse_file *ff);
1086int fuse_finish_open(struct inode *inode, struct file *file);
1087
1088void fuse_sync_release(struct fuse_inode *fi, struct fuse_file *ff,
1089		       unsigned int flags);
1090
1091/**
1092 * Send RELEASE or RELEASEDIR request
1093 */
1094void fuse_release_common(struct file *file, bool isdir);
1095
1096/**
1097 * Send FSYNC or FSYNCDIR request
1098 */
1099int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
1100		      int datasync, int opcode);
1101
1102/**
1103 * Notify poll wakeup
1104 */
1105int fuse_notify_poll_wakeup(struct fuse_conn *fc,
1106			    struct fuse_notify_poll_wakeup_out *outarg);
1107
1108/**
1109 * Initialize file operations on a regular file
1110 */
1111void fuse_init_file_inode(struct inode *inode, unsigned int flags);
1112
1113/**
1114 * Initialize inode operations on regular files and special files
1115 */
1116void fuse_init_common(struct inode *inode);
1117
1118/**
1119 * Initialize inode and file operations on a directory
1120 */
1121void fuse_init_dir(struct inode *inode);
1122
1123/**
1124 * Initialize inode operations on a symlink
1125 */
1126void fuse_init_symlink(struct inode *inode);
1127
1128/**
1129 * Change attributes of an inode
1130 */
1131void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
1132			    struct fuse_statx *sx,
1133			    u64 attr_valid, u64 attr_version);
1134
1135void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
1136				   struct fuse_statx *sx,
1137				   u64 attr_valid, u32 cache_mask);
1138
1139u32 fuse_get_cache_mask(struct inode *inode);
1140
1141/**
1142 * Initialize the client device
1143 */
1144int fuse_dev_init(void);
1145
1146/**
1147 * Cleanup the client device
1148 */
1149void fuse_dev_cleanup(void);
1150
1151int fuse_ctl_init(void);
1152void __exit fuse_ctl_cleanup(void);
1153
1154/**
1155 * Simple request sending that does request allocation and freeing
1156 */
1157ssize_t fuse_simple_request(struct fuse_mount *fm, struct fuse_args *args);
1158int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args,
1159			   gfp_t gfp_flags);
1160
1161/**
1162 * End a finished request
1163 */
1164void fuse_request_end(struct fuse_req *req);
1165
1166/* Abort all requests */
1167void fuse_abort_conn(struct fuse_conn *fc);
1168void fuse_wait_aborted(struct fuse_conn *fc);
1169
1170/**
1171 * Invalidate inode attributes
1172 */
1173
1174/* Attributes possibly changed on data modification */
1175#define FUSE_STATX_MODIFY	(STATX_MTIME | STATX_CTIME | STATX_BLOCKS)
1176
1177/* Attributes possibly changed on data and/or size modification */
1178#define FUSE_STATX_MODSIZE	(FUSE_STATX_MODIFY | STATX_SIZE)
1179
1180void fuse_invalidate_attr(struct inode *inode);
1181void fuse_invalidate_attr_mask(struct inode *inode, u32 mask);
1182
1183void fuse_invalidate_entry_cache(struct dentry *entry);
1184
1185void fuse_invalidate_atime(struct inode *inode);
1186
1187u64 fuse_time_to_jiffies(u64 sec, u32 nsec);
1188#define ATTR_TIMEOUT(o) \
1189	fuse_time_to_jiffies((o)->attr_valid, (o)->attr_valid_nsec)
1190
1191void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o);
1192
1193/**
1194 * Acquire reference to fuse_conn
1195 */
1196struct fuse_conn *fuse_conn_get(struct fuse_conn *fc);
1197
1198/**
1199 * Initialize fuse_conn
1200 */
1201void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
1202		    struct user_namespace *user_ns,
1203		    const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv);
1204
1205/**
1206 * Release reference to fuse_conn
1207 */
1208void fuse_conn_put(struct fuse_conn *fc);
1209
1210struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc);
1211struct fuse_dev *fuse_dev_alloc(void);
1212void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc);
1213void fuse_dev_free(struct fuse_dev *fud);
1214void fuse_send_init(struct fuse_mount *fm);
1215
1216/**
1217 * Fill in superblock and initialize fuse connection
1218 * @sb: partially-initialized superblock to fill in
1219 * @ctx: mount context
1220 */
1221int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx);
1222
1223/*
1224 * Remove the mount from the connection
1225 *
1226 * Returns whether this was the last mount
1227 */
1228bool fuse_mount_remove(struct fuse_mount *fm);
1229
1230/*
1231 * Setup context ops for submounts
1232 */
1233int fuse_init_fs_context_submount(struct fs_context *fsc);
1234
1235/*
1236 * Shut down the connection (possibly sending DESTROY request).
1237 */
1238void fuse_conn_destroy(struct fuse_mount *fm);
1239
1240/* Drop the connection and free the fuse mount */
1241void fuse_mount_destroy(struct fuse_mount *fm);
1242
1243/**
1244 * Add connection to control filesystem
1245 */
1246int fuse_ctl_add_conn(struct fuse_conn *fc);
1247
1248/**
1249 * Remove connection from control filesystem
1250 */
1251void fuse_ctl_remove_conn(struct fuse_conn *fc);
1252
1253/**
1254 * Is file type valid?
1255 */
1256int fuse_valid_type(int m);
1257
1258bool fuse_invalid_attr(struct fuse_attr *attr);
1259
1260/**
1261 * Is current process allowed to perform filesystem operation?
1262 */
1263bool fuse_allow_current_process(struct fuse_conn *fc);
1264
1265u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id);
1266
1267void fuse_flush_time_update(struct inode *inode);
1268void fuse_update_ctime(struct inode *inode);
1269
1270int fuse_update_attributes(struct inode *inode, struct file *file, u32 mask);
1271
1272void fuse_flush_writepages(struct inode *inode);
1273
1274void fuse_set_nowrite(struct inode *inode);
1275void fuse_release_nowrite(struct inode *inode);
1276
1277/**
1278 * Scan all fuse_mounts belonging to fc to find the first where
1279 * ilookup5() returns a result.  Return that result and the
1280 * respective fuse_mount in *fm (unless fm is NULL).
1281 *
1282 * The caller must hold fc->killsb.
1283 */
1284struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid,
1285			   struct fuse_mount **fm);
1286
1287/**
1288 * File-system tells the kernel to invalidate cache for the given node id.
1289 */
1290int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid,
1291			     loff_t offset, loff_t len);
1292
1293/**
1294 * File-system tells the kernel to invalidate parent attributes and
1295 * the dentry matching parent/name.
1296 *
1297 * If the child_nodeid is non-zero and:
1298 *    - matches the inode number for the dentry matching parent/name,
1299 *    - is not a mount point
1300 *    - is a file or oan empty directory
1301 * then the dentry is unhashed (d_delete()).
1302 */
1303int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
1304			     u64 child_nodeid, struct qstr *name, u32 flags);
1305
1306int fuse_do_open(struct fuse_mount *fm, u64 nodeid, struct file *file,
1307		 bool isdir);
1308
1309/**
1310 * fuse_direct_io() flags
1311 */
1312
1313/** If set, it is WRITE; otherwise - READ */
1314#define FUSE_DIO_WRITE (1 << 0)
1315
1316/** CUSE pass fuse_direct_io() a file which f_mapping->host is not from FUSE */
1317#define FUSE_DIO_CUSE  (1 << 1)
1318
1319ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
1320		       loff_t *ppos, int flags);
1321long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
1322		   unsigned int flags);
1323long fuse_ioctl_common(struct file *file, unsigned int cmd,
1324		       unsigned long arg, unsigned int flags);
1325__poll_t fuse_file_poll(struct file *file, poll_table *wait);
1326int fuse_dev_release(struct inode *inode, struct file *file);
1327
1328bool fuse_write_update_attr(struct inode *inode, loff_t pos, ssize_t written);
1329
1330int fuse_flush_times(struct inode *inode, struct fuse_file *ff);
1331int fuse_write_inode(struct inode *inode, struct writeback_control *wbc);
1332
1333int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
1334		    struct file *file);
1335
1336void fuse_set_initialized(struct fuse_conn *fc);
1337
1338void fuse_unlock_inode(struct inode *inode, bool locked);
1339bool fuse_lock_inode(struct inode *inode);
1340
1341int fuse_setxattr(struct inode *inode, const char *name, const void *value,
1342		  size_t size, int flags, unsigned int extra_flags);
1343ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value,
1344		      size_t size);
1345ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size);
1346int fuse_removexattr(struct inode *inode, const char *name);
1347extern const struct xattr_handler * const fuse_xattr_handlers[];
1348
1349struct posix_acl;
1350struct posix_acl *fuse_get_inode_acl(struct inode *inode, int type, bool rcu);
1351struct posix_acl *fuse_get_acl(struct mnt_idmap *idmap,
1352			       struct dentry *dentry, int type);
1353int fuse_set_acl(struct mnt_idmap *, struct dentry *dentry,
1354		 struct posix_acl *acl, int type);
1355
1356/* readdir.c */
1357int fuse_readdir(struct file *file, struct dir_context *ctx);
1358
1359/**
1360 * Return the number of bytes in an arguments list
1361 */
1362unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args);
1363
1364/**
1365 * Get the next unique ID for a request
1366 */
1367u64 fuse_get_unique(struct fuse_iqueue *fiq);
1368void fuse_free_conn(struct fuse_conn *fc);
1369
1370/* dax.c */
1371
1372#define FUSE_IS_DAX(inode) (IS_ENABLED(CONFIG_FUSE_DAX) && IS_DAX(inode))
1373
1374ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to);
1375ssize_t fuse_dax_write_iter(struct kiocb *iocb, struct iov_iter *from);
1376int fuse_dax_mmap(struct file *file, struct vm_area_struct *vma);
1377int fuse_dax_break_layouts(struct inode *inode, u64 dmap_start, u64 dmap_end);
1378int fuse_dax_conn_alloc(struct fuse_conn *fc, enum fuse_dax_mode mode,
1379			struct dax_device *dax_dev);
1380void fuse_dax_conn_free(struct fuse_conn *fc);
1381bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi);
1382void fuse_dax_inode_init(struct inode *inode, unsigned int flags);
1383void fuse_dax_inode_cleanup(struct inode *inode);
1384void fuse_dax_dontcache(struct inode *inode, unsigned int flags);
1385bool fuse_dax_check_alignment(struct fuse_conn *fc, unsigned int map_alignment);
1386void fuse_dax_cancel_work(struct fuse_conn *fc);
1387
1388/* ioctl.c */
1389long fuse_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
1390long fuse_file_compat_ioctl(struct file *file, unsigned int cmd,
1391			    unsigned long arg);
1392int fuse_fileattr_get(struct dentry *dentry, struct fileattr *fa);
1393int fuse_fileattr_set(struct mnt_idmap *idmap,
1394		      struct dentry *dentry, struct fileattr *fa);
1395
1396/* iomode.c */
1397int fuse_file_cached_io_open(struct inode *inode, struct fuse_file *ff);
1398int fuse_inode_uncached_io_start(struct fuse_inode *fi,
1399				 struct fuse_backing *fb);
1400void fuse_inode_uncached_io_end(struct fuse_inode *fi);
1401
1402int fuse_file_io_open(struct file *file, struct inode *inode);
1403void fuse_file_io_release(struct fuse_file *ff, struct inode *inode);
1404
1405/* file.c */
1406struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,
1407				 unsigned int open_flags, bool isdir);
1408void fuse_file_release(struct inode *inode, struct fuse_file *ff,
1409		       unsigned int open_flags, fl_owner_t id, bool isdir);
1410
1411/* passthrough.c */
1412static inline struct fuse_backing *fuse_inode_backing(struct fuse_inode *fi)
1413{
1414#ifdef CONFIG_FUSE_PASSTHROUGH
1415	return READ_ONCE(fi->fb);
1416#else
1417	return NULL;
1418#endif
1419}
1420
1421static inline struct fuse_backing *fuse_inode_backing_set(struct fuse_inode *fi,
1422							  struct fuse_backing *fb)
1423{
1424#ifdef CONFIG_FUSE_PASSTHROUGH
1425	return xchg(&fi->fb, fb);
1426#else
1427	return NULL;
1428#endif
1429}
1430
1431#ifdef CONFIG_FUSE_PASSTHROUGH
1432struct fuse_backing *fuse_backing_get(struct fuse_backing *fb);
1433void fuse_backing_put(struct fuse_backing *fb);
1434#else
1435
1436static inline struct fuse_backing *fuse_backing_get(struct fuse_backing *fb)
1437{
1438	return NULL;
1439}
1440
1441static inline void fuse_backing_put(struct fuse_backing *fb)
1442{
1443}
1444#endif
1445
1446void fuse_backing_files_init(struct fuse_conn *fc);
1447void fuse_backing_files_free(struct fuse_conn *fc);
1448int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map);
1449int fuse_backing_close(struct fuse_conn *fc, int backing_id);
1450
1451struct fuse_backing *fuse_passthrough_open(struct file *file,
1452					   struct inode *inode,
1453					   int backing_id);
1454void fuse_passthrough_release(struct fuse_file *ff, struct fuse_backing *fb);
1455
1456static inline struct file *fuse_file_passthrough(struct fuse_file *ff)
1457{
1458#ifdef CONFIG_FUSE_PASSTHROUGH
1459	return ff->passthrough;
1460#else
1461	return NULL;
1462#endif
1463}
1464
1465ssize_t fuse_passthrough_read_iter(struct kiocb *iocb, struct iov_iter *iter);
1466ssize_t fuse_passthrough_write_iter(struct kiocb *iocb, struct iov_iter *iter);
1467ssize_t fuse_passthrough_splice_read(struct file *in, loff_t *ppos,
1468				     struct pipe_inode_info *pipe,
1469				     size_t len, unsigned int flags);
1470ssize_t fuse_passthrough_splice_write(struct pipe_inode_info *pipe,
1471				      struct file *out, loff_t *ppos,
1472				      size_t len, unsigned int flags);
1473ssize_t fuse_passthrough_mmap(struct file *file, struct vm_area_struct *vma);
1474
1475#endif /* _FS_FUSE_I_H */
1476