1/*
2  FUSE: Filesystem in Userspace
3  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4
5  This program can be distributed under the terms of the GNU GPL.
6  See the file COPYING.
7*/
8
9#include "fuse_i.h"
10
11#include <linux/pagemap.h>
12#include <linux/file.h>
13#include <linux/fs_context.h>
14#include <linux/moduleparam.h>
15#include <linux/sched.h>
16#include <linux/namei.h>
17#include <linux/slab.h>
18#include <linux/xattr.h>
19#include <linux/iversion.h>
20#include <linux/posix_acl.h>
21#include <linux/security.h>
22#include <linux/types.h>
23#include <linux/kernel.h>
24
25static bool __read_mostly allow_sys_admin_access;
26module_param(allow_sys_admin_access, bool, 0644);
27MODULE_PARM_DESC(allow_sys_admin_access,
28		 "Allow users with CAP_SYS_ADMIN in initial userns to bypass allow_other access check");
29
30static void fuse_advise_use_readdirplus(struct inode *dir)
31{
32	struct fuse_inode *fi = get_fuse_inode(dir);
33
34	set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
35}
36
37#if BITS_PER_LONG >= 64
38static inline void __fuse_dentry_settime(struct dentry *entry, u64 time)
39{
40	entry->d_fsdata = (void *) time;
41}
42
43static inline u64 fuse_dentry_time(const struct dentry *entry)
44{
45	return (u64)entry->d_fsdata;
46}
47
48#else
49union fuse_dentry {
50	u64 time;
51	struct rcu_head rcu;
52};
53
54static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time)
55{
56	((union fuse_dentry *) dentry->d_fsdata)->time = time;
57}
58
59static inline u64 fuse_dentry_time(const struct dentry *entry)
60{
61	return ((union fuse_dentry *) entry->d_fsdata)->time;
62}
63#endif
64
65static void fuse_dentry_settime(struct dentry *dentry, u64 time)
66{
67	struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
68	bool delete = !time && fc->delete_stale;
69	/*
70	 * Mess with DCACHE_OP_DELETE because dput() will be faster without it.
71	 * Don't care about races, either way it's just an optimization
72	 */
73	if ((!delete && (dentry->d_flags & DCACHE_OP_DELETE)) ||
74	    (delete && !(dentry->d_flags & DCACHE_OP_DELETE))) {
75		spin_lock(&dentry->d_lock);
76		if (!delete)
77			dentry->d_flags &= ~DCACHE_OP_DELETE;
78		else
79			dentry->d_flags |= DCACHE_OP_DELETE;
80		spin_unlock(&dentry->d_lock);
81	}
82
83	__fuse_dentry_settime(dentry, time);
84}
85
86/*
87 * FUSE caches dentries and attributes with separate timeout.  The
88 * time in jiffies until the dentry/attributes are valid is stored in
89 * dentry->d_fsdata and fuse_inode->i_time respectively.
90 */
91
92/*
93 * Calculate the time in jiffies until a dentry/attributes are valid
94 */
95u64 fuse_time_to_jiffies(u64 sec, u32 nsec)
96{
97	if (sec || nsec) {
98		struct timespec64 ts = {
99			sec,
100			min_t(u32, nsec, NSEC_PER_SEC - 1)
101		};
102
103		return get_jiffies_64() + timespec64_to_jiffies(&ts);
104	} else
105		return 0;
106}
107
108/*
109 * Set dentry and possibly attribute timeouts from the lookup/mk*
110 * replies
111 */
112void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o)
113{
114	fuse_dentry_settime(entry,
115		fuse_time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
116}
117
118void fuse_invalidate_attr_mask(struct inode *inode, u32 mask)
119{
120	set_mask_bits(&get_fuse_inode(inode)->inval_mask, 0, mask);
121}
122
123/*
124 * Mark the attributes as stale, so that at the next call to
125 * ->getattr() they will be fetched from userspace
126 */
127void fuse_invalidate_attr(struct inode *inode)
128{
129	fuse_invalidate_attr_mask(inode, STATX_BASIC_STATS);
130}
131
132static void fuse_dir_changed(struct inode *dir)
133{
134	fuse_invalidate_attr(dir);
135	inode_maybe_inc_iversion(dir, false);
136}
137
138/*
139 * Mark the attributes as stale due to an atime change.  Avoid the invalidate if
140 * atime is not used.
141 */
142void fuse_invalidate_atime(struct inode *inode)
143{
144	if (!IS_RDONLY(inode))
145		fuse_invalidate_attr_mask(inode, STATX_ATIME);
146}
147
148/*
149 * Just mark the entry as stale, so that a next attempt to look it up
150 * will result in a new lookup call to userspace
151 *
152 * This is called when a dentry is about to become negative and the
153 * timeout is unknown (unlink, rmdir, rename and in some cases
154 * lookup)
155 */
156void fuse_invalidate_entry_cache(struct dentry *entry)
157{
158	fuse_dentry_settime(entry, 0);
159}
160
161/*
162 * Same as fuse_invalidate_entry_cache(), but also try to remove the
163 * dentry from the hash
164 */
165static void fuse_invalidate_entry(struct dentry *entry)
166{
167	d_invalidate(entry);
168	fuse_invalidate_entry_cache(entry);
169}
170
171static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
172			     u64 nodeid, const struct qstr *name,
173			     struct fuse_entry_out *outarg)
174{
175	memset(outarg, 0, sizeof(struct fuse_entry_out));
176	args->opcode = FUSE_LOOKUP;
177	args->nodeid = nodeid;
178	args->in_numargs = 1;
179	args->in_args[0].size = name->len + 1;
180	args->in_args[0].value = name->name;
181	args->out_numargs = 1;
182	args->out_args[0].size = sizeof(struct fuse_entry_out);
183	args->out_args[0].value = outarg;
184}
185
186/*
187 * Check whether the dentry is still valid
188 *
189 * If the entry validity timeout has expired and the dentry is
190 * positive, try to redo the lookup.  If the lookup results in a
191 * different inode, then let the VFS invalidate the dentry and redo
192 * the lookup once more.  If the lookup results in the same inode,
193 * then refresh the attributes, timeouts and mark the dentry valid.
194 */
195static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
196{
197	struct inode *inode;
198	struct dentry *parent;
199	struct fuse_mount *fm;
200	struct fuse_inode *fi;
201	int ret;
202
203	inode = d_inode_rcu(entry);
204	if (inode && fuse_is_bad(inode))
205		goto invalid;
206	else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
207		 (flags & (LOOKUP_EXCL | LOOKUP_REVAL | LOOKUP_RENAME_TARGET))) {
208		struct fuse_entry_out outarg;
209		FUSE_ARGS(args);
210		struct fuse_forget_link *forget;
211		u64 attr_version;
212
213		/* For negative dentries, always do a fresh lookup */
214		if (!inode)
215			goto invalid;
216
217		ret = -ECHILD;
218		if (flags & LOOKUP_RCU)
219			goto out;
220
221		fm = get_fuse_mount(inode);
222
223		forget = fuse_alloc_forget();
224		ret = -ENOMEM;
225		if (!forget)
226			goto out;
227
228		attr_version = fuse_get_attr_version(fm->fc);
229
230		parent = dget_parent(entry);
231		fuse_lookup_init(fm->fc, &args, get_node_id(d_inode(parent)),
232				 &entry->d_name, &outarg);
233		ret = fuse_simple_request(fm, &args);
234		dput(parent);
235		/* Zero nodeid is same as -ENOENT */
236		if (!ret && !outarg.nodeid)
237			ret = -ENOENT;
238		if (!ret) {
239			fi = get_fuse_inode(inode);
240			if (outarg.nodeid != get_node_id(inode) ||
241			    (bool) IS_AUTOMOUNT(inode) != (bool) (outarg.attr.flags & FUSE_ATTR_SUBMOUNT)) {
242				fuse_queue_forget(fm->fc, forget,
243						  outarg.nodeid, 1);
244				goto invalid;
245			}
246			spin_lock(&fi->lock);
247			fi->nlookup++;
248			spin_unlock(&fi->lock);
249		}
250		kfree(forget);
251		if (ret == -ENOMEM || ret == -EINTR)
252			goto out;
253		if (ret || fuse_invalid_attr(&outarg.attr) ||
254		    fuse_stale_inode(inode, outarg.generation, &outarg.attr))
255			goto invalid;
256
257		forget_all_cached_acls(inode);
258		fuse_change_attributes(inode, &outarg.attr, NULL,
259				       ATTR_TIMEOUT(&outarg),
260				       attr_version);
261		fuse_change_entry_timeout(entry, &outarg);
262	} else if (inode) {
263		fi = get_fuse_inode(inode);
264		if (flags & LOOKUP_RCU) {
265			if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
266				return -ECHILD;
267		} else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
268			parent = dget_parent(entry);
269			fuse_advise_use_readdirplus(d_inode(parent));
270			dput(parent);
271		}
272	}
273	ret = 1;
274out:
275	return ret;
276
277invalid:
278	ret = 0;
279	goto out;
280}
281
282#if BITS_PER_LONG < 64
283static int fuse_dentry_init(struct dentry *dentry)
284{
285	dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry),
286				   GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE);
287
288	return dentry->d_fsdata ? 0 : -ENOMEM;
289}
290static void fuse_dentry_release(struct dentry *dentry)
291{
292	union fuse_dentry *fd = dentry->d_fsdata;
293
294	kfree_rcu(fd, rcu);
295}
296#endif
297
298static int fuse_dentry_delete(const struct dentry *dentry)
299{
300	return time_before64(fuse_dentry_time(dentry), get_jiffies_64());
301}
302
303/*
304 * Create a fuse_mount object with a new superblock (with path->dentry
305 * as the root), and return that mount so it can be auto-mounted on
306 * @path.
307 */
308static struct vfsmount *fuse_dentry_automount(struct path *path)
309{
310	struct fs_context *fsc;
311	struct vfsmount *mnt;
312	struct fuse_inode *mp_fi = get_fuse_inode(d_inode(path->dentry));
313
314	fsc = fs_context_for_submount(path->mnt->mnt_sb->s_type, path->dentry);
315	if (IS_ERR(fsc))
316		return ERR_CAST(fsc);
317
318	/* Pass the FUSE inode of the mount for fuse_get_tree_submount() */
319	fsc->fs_private = mp_fi;
320
321	/* Create the submount */
322	mnt = fc_mount(fsc);
323	if (!IS_ERR(mnt))
324		mntget(mnt);
325
326	put_fs_context(fsc);
327	return mnt;
328}
329
330const struct dentry_operations fuse_dentry_operations = {
331	.d_revalidate	= fuse_dentry_revalidate,
332	.d_delete	= fuse_dentry_delete,
333#if BITS_PER_LONG < 64
334	.d_init		= fuse_dentry_init,
335	.d_release	= fuse_dentry_release,
336#endif
337	.d_automount	= fuse_dentry_automount,
338};
339
340const struct dentry_operations fuse_root_dentry_operations = {
341#if BITS_PER_LONG < 64
342	.d_init		= fuse_dentry_init,
343	.d_release	= fuse_dentry_release,
344#endif
345};
346
347int fuse_valid_type(int m)
348{
349	return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
350		S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
351}
352
353static bool fuse_valid_size(u64 size)
354{
355	return size <= LLONG_MAX;
356}
357
358bool fuse_invalid_attr(struct fuse_attr *attr)
359{
360	return !fuse_valid_type(attr->mode) || !fuse_valid_size(attr->size);
361}
362
363int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
364		     struct fuse_entry_out *outarg, struct inode **inode)
365{
366	struct fuse_mount *fm = get_fuse_mount_super(sb);
367	FUSE_ARGS(args);
368	struct fuse_forget_link *forget;
369	u64 attr_version;
370	int err;
371
372	*inode = NULL;
373	err = -ENAMETOOLONG;
374	if (name->len > FUSE_NAME_MAX)
375		goto out;
376
377
378	forget = fuse_alloc_forget();
379	err = -ENOMEM;
380	if (!forget)
381		goto out;
382
383	attr_version = fuse_get_attr_version(fm->fc);
384
385	fuse_lookup_init(fm->fc, &args, nodeid, name, outarg);
386	err = fuse_simple_request(fm, &args);
387	/* Zero nodeid is same as -ENOENT, but with valid timeout */
388	if (err || !outarg->nodeid)
389		goto out_put_forget;
390
391	err = -EIO;
392	if (fuse_invalid_attr(&outarg->attr))
393		goto out_put_forget;
394	if (outarg->nodeid == FUSE_ROOT_ID && outarg->generation != 0) {
395		pr_warn_once("root generation should be zero\n");
396		outarg->generation = 0;
397	}
398
399	*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
400			   &outarg->attr, ATTR_TIMEOUT(outarg),
401			   attr_version);
402	err = -ENOMEM;
403	if (!*inode) {
404		fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1);
405		goto out;
406	}
407	err = 0;
408
409 out_put_forget:
410	kfree(forget);
411 out:
412	return err;
413}
414
415static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
416				  unsigned int flags)
417{
418	int err;
419	struct fuse_entry_out outarg;
420	struct inode *inode;
421	struct dentry *newent;
422	bool outarg_valid = true;
423	bool locked;
424
425	if (fuse_is_bad(dir))
426		return ERR_PTR(-EIO);
427
428	locked = fuse_lock_inode(dir);
429	err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
430			       &outarg, &inode);
431	fuse_unlock_inode(dir, locked);
432	if (err == -ENOENT) {
433		outarg_valid = false;
434		err = 0;
435	}
436	if (err)
437		goto out_err;
438
439	err = -EIO;
440	if (inode && get_node_id(inode) == FUSE_ROOT_ID)
441		goto out_iput;
442
443	newent = d_splice_alias(inode, entry);
444	err = PTR_ERR(newent);
445	if (IS_ERR(newent))
446		goto out_err;
447
448	entry = newent ? newent : entry;
449	if (outarg_valid)
450		fuse_change_entry_timeout(entry, &outarg);
451	else
452		fuse_invalidate_entry_cache(entry);
453
454	if (inode)
455		fuse_advise_use_readdirplus(dir);
456	return newent;
457
458 out_iput:
459	iput(inode);
460 out_err:
461	return ERR_PTR(err);
462}
463
464static int get_security_context(struct dentry *entry, umode_t mode,
465				struct fuse_in_arg *ext)
466{
467	struct fuse_secctx *fctx;
468	struct fuse_secctx_header *header;
469	void *ctx = NULL, *ptr;
470	u32 ctxlen, total_len = sizeof(*header);
471	int err, nr_ctx = 0;
472	const char *name;
473	size_t namelen;
474
475	err = security_dentry_init_security(entry, mode, &entry->d_name,
476					    &name, &ctx, &ctxlen);
477	if (err) {
478		if (err != -EOPNOTSUPP)
479			goto out_err;
480		/* No LSM is supporting this security hook. Ignore error */
481		ctxlen = 0;
482		ctx = NULL;
483	}
484
485	if (ctxlen) {
486		nr_ctx = 1;
487		namelen = strlen(name) + 1;
488		err = -EIO;
489		if (WARN_ON(namelen > XATTR_NAME_MAX + 1 || ctxlen > S32_MAX))
490			goto out_err;
491		total_len += FUSE_REC_ALIGN(sizeof(*fctx) + namelen + ctxlen);
492	}
493
494	err = -ENOMEM;
495	header = ptr = kzalloc(total_len, GFP_KERNEL);
496	if (!ptr)
497		goto out_err;
498
499	header->nr_secctx = nr_ctx;
500	header->size = total_len;
501	ptr += sizeof(*header);
502	if (nr_ctx) {
503		fctx = ptr;
504		fctx->size = ctxlen;
505		ptr += sizeof(*fctx);
506
507		strcpy(ptr, name);
508		ptr += namelen;
509
510		memcpy(ptr, ctx, ctxlen);
511	}
512	ext->size = total_len;
513	ext->value = header;
514	err = 0;
515out_err:
516	kfree(ctx);
517	return err;
518}
519
520static void *extend_arg(struct fuse_in_arg *buf, u32 bytes)
521{
522	void *p;
523	u32 newlen = buf->size + bytes;
524
525	p = krealloc(buf->value, newlen, GFP_KERNEL);
526	if (!p) {
527		kfree(buf->value);
528		buf->size = 0;
529		buf->value = NULL;
530		return NULL;
531	}
532
533	memset(p + buf->size, 0, bytes);
534	buf->value = p;
535	buf->size = newlen;
536
537	return p + newlen - bytes;
538}
539
540static u32 fuse_ext_size(size_t size)
541{
542	return FUSE_REC_ALIGN(sizeof(struct fuse_ext_header) + size);
543}
544
545/*
546 * This adds just a single supplementary group that matches the parent's group.
547 */
548static int get_create_supp_group(struct inode *dir, struct fuse_in_arg *ext)
549{
550	struct fuse_conn *fc = get_fuse_conn(dir);
551	struct fuse_ext_header *xh;
552	struct fuse_supp_groups *sg;
553	kgid_t kgid = dir->i_gid;
554	gid_t parent_gid = from_kgid(fc->user_ns, kgid);
555	u32 sg_len = fuse_ext_size(sizeof(*sg) + sizeof(sg->groups[0]));
556
557	if (parent_gid == (gid_t) -1 || gid_eq(kgid, current_fsgid()) ||
558	    !in_group_p(kgid))
559		return 0;
560
561	xh = extend_arg(ext, sg_len);
562	if (!xh)
563		return -ENOMEM;
564
565	xh->size = sg_len;
566	xh->type = FUSE_EXT_GROUPS;
567
568	sg = (struct fuse_supp_groups *) &xh[1];
569	sg->nr_groups = 1;
570	sg->groups[0] = parent_gid;
571
572	return 0;
573}
574
575static int get_create_ext(struct fuse_args *args,
576			  struct inode *dir, struct dentry *dentry,
577			  umode_t mode)
578{
579	struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
580	struct fuse_in_arg ext = { .size = 0, .value = NULL };
581	int err = 0;
582
583	if (fc->init_security)
584		err = get_security_context(dentry, mode, &ext);
585	if (!err && fc->create_supp_group)
586		err = get_create_supp_group(dir, &ext);
587
588	if (!err && ext.size) {
589		WARN_ON(args->in_numargs >= ARRAY_SIZE(args->in_args));
590		args->is_ext = true;
591		args->ext_idx = args->in_numargs++;
592		args->in_args[args->ext_idx] = ext;
593	} else {
594		kfree(ext.value);
595	}
596
597	return err;
598}
599
600static void free_ext_value(struct fuse_args *args)
601{
602	if (args->is_ext)
603		kfree(args->in_args[args->ext_idx].value);
604}
605
606/*
607 * Atomic create+open operation
608 *
609 * If the filesystem doesn't support this, then fall back to separate
610 * 'mknod' + 'open' requests.
611 */
612static int fuse_create_open(struct inode *dir, struct dentry *entry,
613			    struct file *file, unsigned int flags,
614			    umode_t mode, u32 opcode)
615{
616	int err;
617	struct inode *inode;
618	struct fuse_mount *fm = get_fuse_mount(dir);
619	FUSE_ARGS(args);
620	struct fuse_forget_link *forget;
621	struct fuse_create_in inarg;
622	struct fuse_open_out *outopenp;
623	struct fuse_entry_out outentry;
624	struct fuse_inode *fi;
625	struct fuse_file *ff;
626	bool trunc = flags & O_TRUNC;
627
628	/* Userspace expects S_IFREG in create mode */
629	BUG_ON((mode & S_IFMT) != S_IFREG);
630
631	forget = fuse_alloc_forget();
632	err = -ENOMEM;
633	if (!forget)
634		goto out_err;
635
636	err = -ENOMEM;
637	ff = fuse_file_alloc(fm, true);
638	if (!ff)
639		goto out_put_forget_req;
640
641	if (!fm->fc->dont_mask)
642		mode &= ~current_umask();
643
644	flags &= ~O_NOCTTY;
645	memset(&inarg, 0, sizeof(inarg));
646	memset(&outentry, 0, sizeof(outentry));
647	inarg.flags = flags;
648	inarg.mode = mode;
649	inarg.umask = current_umask();
650
651	if (fm->fc->handle_killpriv_v2 && trunc &&
652	    !(flags & O_EXCL) && !capable(CAP_FSETID)) {
653		inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID;
654	}
655
656	args.opcode = opcode;
657	args.nodeid = get_node_id(dir);
658	args.in_numargs = 2;
659	args.in_args[0].size = sizeof(inarg);
660	args.in_args[0].value = &inarg;
661	args.in_args[1].size = entry->d_name.len + 1;
662	args.in_args[1].value = entry->d_name.name;
663	args.out_numargs = 2;
664	args.out_args[0].size = sizeof(outentry);
665	args.out_args[0].value = &outentry;
666	/* Store outarg for fuse_finish_open() */
667	outopenp = &ff->args->open_outarg;
668	args.out_args[1].size = sizeof(*outopenp);
669	args.out_args[1].value = outopenp;
670
671	err = get_create_ext(&args, dir, entry, mode);
672	if (err)
673		goto out_put_forget_req;
674
675	err = fuse_simple_request(fm, &args);
676	free_ext_value(&args);
677	if (err)
678		goto out_free_ff;
679
680	err = -EIO;
681	if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid) ||
682	    fuse_invalid_attr(&outentry.attr))
683		goto out_free_ff;
684
685	ff->fh = outopenp->fh;
686	ff->nodeid = outentry.nodeid;
687	ff->open_flags = outopenp->open_flags;
688	inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
689			  &outentry.attr, ATTR_TIMEOUT(&outentry), 0);
690	if (!inode) {
691		flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
692		fuse_sync_release(NULL, ff, flags);
693		fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1);
694		err = -ENOMEM;
695		goto out_err;
696	}
697	kfree(forget);
698	d_instantiate(entry, inode);
699	fuse_change_entry_timeout(entry, &outentry);
700	fuse_dir_changed(dir);
701	err = generic_file_open(inode, file);
702	if (!err) {
703		file->private_data = ff;
704		err = finish_open(file, entry, fuse_finish_open);
705	}
706	if (err) {
707		fi = get_fuse_inode(inode);
708		fuse_sync_release(fi, ff, flags);
709	} else {
710		if (fm->fc->atomic_o_trunc && trunc)
711			truncate_pagecache(inode, 0);
712		else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
713			invalidate_inode_pages2(inode->i_mapping);
714	}
715	return err;
716
717out_free_ff:
718	fuse_file_free(ff);
719out_put_forget_req:
720	kfree(forget);
721out_err:
722	return err;
723}
724
725static int fuse_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
726		      umode_t, dev_t);
727static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
728			    struct file *file, unsigned flags,
729			    umode_t mode)
730{
731	int err;
732	struct fuse_conn *fc = get_fuse_conn(dir);
733	struct dentry *res = NULL;
734
735	if (fuse_is_bad(dir))
736		return -EIO;
737
738	if (d_in_lookup(entry)) {
739		res = fuse_lookup(dir, entry, 0);
740		if (IS_ERR(res))
741			return PTR_ERR(res);
742
743		if (res)
744			entry = res;
745	}
746
747	if (!(flags & O_CREAT) || d_really_is_positive(entry))
748		goto no_open;
749
750	/* Only creates */
751	file->f_mode |= FMODE_CREATED;
752
753	if (fc->no_create)
754		goto mknod;
755
756	err = fuse_create_open(dir, entry, file, flags, mode, FUSE_CREATE);
757	if (err == -ENOSYS) {
758		fc->no_create = 1;
759		goto mknod;
760	} else if (err == -EEXIST)
761		fuse_invalidate_entry(entry);
762out_dput:
763	dput(res);
764	return err;
765
766mknod:
767	err = fuse_mknod(&nop_mnt_idmap, dir, entry, mode, 0);
768	if (err)
769		goto out_dput;
770no_open:
771	return finish_no_open(file, res);
772}
773
774/*
775 * Code shared between mknod, mkdir, symlink and link
776 */
777static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args,
778			    struct inode *dir, struct dentry *entry,
779			    umode_t mode)
780{
781	struct fuse_entry_out outarg;
782	struct inode *inode;
783	struct dentry *d;
784	int err;
785	struct fuse_forget_link *forget;
786
787	if (fuse_is_bad(dir))
788		return -EIO;
789
790	forget = fuse_alloc_forget();
791	if (!forget)
792		return -ENOMEM;
793
794	memset(&outarg, 0, sizeof(outarg));
795	args->nodeid = get_node_id(dir);
796	args->out_numargs = 1;
797	args->out_args[0].size = sizeof(outarg);
798	args->out_args[0].value = &outarg;
799
800	if (args->opcode != FUSE_LINK) {
801		err = get_create_ext(args, dir, entry, mode);
802		if (err)
803			goto out_put_forget_req;
804	}
805
806	err = fuse_simple_request(fm, args);
807	free_ext_value(args);
808	if (err)
809		goto out_put_forget_req;
810
811	err = -EIO;
812	if (invalid_nodeid(outarg.nodeid) || fuse_invalid_attr(&outarg.attr))
813		goto out_put_forget_req;
814
815	if ((outarg.attr.mode ^ mode) & S_IFMT)
816		goto out_put_forget_req;
817
818	inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
819			  &outarg.attr, ATTR_TIMEOUT(&outarg), 0);
820	if (!inode) {
821		fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1);
822		return -ENOMEM;
823	}
824	kfree(forget);
825
826	d_drop(entry);
827	d = d_splice_alias(inode, entry);
828	if (IS_ERR(d))
829		return PTR_ERR(d);
830
831	if (d) {
832		fuse_change_entry_timeout(d, &outarg);
833		dput(d);
834	} else {
835		fuse_change_entry_timeout(entry, &outarg);
836	}
837	fuse_dir_changed(dir);
838	return 0;
839
840 out_put_forget_req:
841	if (err == -EEXIST)
842		fuse_invalidate_entry(entry);
843	kfree(forget);
844	return err;
845}
846
847static int fuse_mknod(struct mnt_idmap *idmap, struct inode *dir,
848		      struct dentry *entry, umode_t mode, dev_t rdev)
849{
850	struct fuse_mknod_in inarg;
851	struct fuse_mount *fm = get_fuse_mount(dir);
852	FUSE_ARGS(args);
853
854	if (!fm->fc->dont_mask)
855		mode &= ~current_umask();
856
857	memset(&inarg, 0, sizeof(inarg));
858	inarg.mode = mode;
859	inarg.rdev = new_encode_dev(rdev);
860	inarg.umask = current_umask();
861	args.opcode = FUSE_MKNOD;
862	args.in_numargs = 2;
863	args.in_args[0].size = sizeof(inarg);
864	args.in_args[0].value = &inarg;
865	args.in_args[1].size = entry->d_name.len + 1;
866	args.in_args[1].value = entry->d_name.name;
867	return create_new_entry(fm, &args, dir, entry, mode);
868}
869
870static int fuse_create(struct mnt_idmap *idmap, struct inode *dir,
871		       struct dentry *entry, umode_t mode, bool excl)
872{
873	return fuse_mknod(&nop_mnt_idmap, dir, entry, mode, 0);
874}
875
876static int fuse_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
877			struct file *file, umode_t mode)
878{
879	struct fuse_conn *fc = get_fuse_conn(dir);
880	int err;
881
882	if (fc->no_tmpfile)
883		return -EOPNOTSUPP;
884
885	err = fuse_create_open(dir, file->f_path.dentry, file, file->f_flags, mode, FUSE_TMPFILE);
886	if (err == -ENOSYS) {
887		fc->no_tmpfile = 1;
888		err = -EOPNOTSUPP;
889	}
890	return err;
891}
892
893static int fuse_mkdir(struct mnt_idmap *idmap, struct inode *dir,
894		      struct dentry *entry, umode_t mode)
895{
896	struct fuse_mkdir_in inarg;
897	struct fuse_mount *fm = get_fuse_mount(dir);
898	FUSE_ARGS(args);
899
900	if (!fm->fc->dont_mask)
901		mode &= ~current_umask();
902
903	memset(&inarg, 0, sizeof(inarg));
904	inarg.mode = mode;
905	inarg.umask = current_umask();
906	args.opcode = FUSE_MKDIR;
907	args.in_numargs = 2;
908	args.in_args[0].size = sizeof(inarg);
909	args.in_args[0].value = &inarg;
910	args.in_args[1].size = entry->d_name.len + 1;
911	args.in_args[1].value = entry->d_name.name;
912	return create_new_entry(fm, &args, dir, entry, S_IFDIR);
913}
914
915static int fuse_symlink(struct mnt_idmap *idmap, struct inode *dir,
916			struct dentry *entry, const char *link)
917{
918	struct fuse_mount *fm = get_fuse_mount(dir);
919	unsigned len = strlen(link) + 1;
920	FUSE_ARGS(args);
921
922	args.opcode = FUSE_SYMLINK;
923	args.in_numargs = 2;
924	args.in_args[0].size = entry->d_name.len + 1;
925	args.in_args[0].value = entry->d_name.name;
926	args.in_args[1].size = len;
927	args.in_args[1].value = link;
928	return create_new_entry(fm, &args, dir, entry, S_IFLNK);
929}
930
931void fuse_flush_time_update(struct inode *inode)
932{
933	int err = sync_inode_metadata(inode, 1);
934
935	mapping_set_error(inode->i_mapping, err);
936}
937
938static void fuse_update_ctime_in_cache(struct inode *inode)
939{
940	if (!IS_NOCMTIME(inode)) {
941		inode_set_ctime_current(inode);
942		mark_inode_dirty_sync(inode);
943		fuse_flush_time_update(inode);
944	}
945}
946
947void fuse_update_ctime(struct inode *inode)
948{
949	fuse_invalidate_attr_mask(inode, STATX_CTIME);
950	fuse_update_ctime_in_cache(inode);
951}
952
953static void fuse_entry_unlinked(struct dentry *entry)
954{
955	struct inode *inode = d_inode(entry);
956	struct fuse_conn *fc = get_fuse_conn(inode);
957	struct fuse_inode *fi = get_fuse_inode(inode);
958
959	spin_lock(&fi->lock);
960	fi->attr_version = atomic64_inc_return(&fc->attr_version);
961	/*
962	 * If i_nlink == 0 then unlink doesn't make sense, yet this can
963	 * happen if userspace filesystem is careless.  It would be
964	 * difficult to enforce correct nlink usage so just ignore this
965	 * condition here
966	 */
967	if (S_ISDIR(inode->i_mode))
968		clear_nlink(inode);
969	else if (inode->i_nlink > 0)
970		drop_nlink(inode);
971	spin_unlock(&fi->lock);
972	fuse_invalidate_entry_cache(entry);
973	fuse_update_ctime(inode);
974}
975
976static int fuse_unlink(struct inode *dir, struct dentry *entry)
977{
978	int err;
979	struct fuse_mount *fm = get_fuse_mount(dir);
980	FUSE_ARGS(args);
981
982	if (fuse_is_bad(dir))
983		return -EIO;
984
985	args.opcode = FUSE_UNLINK;
986	args.nodeid = get_node_id(dir);
987	args.in_numargs = 1;
988	args.in_args[0].size = entry->d_name.len + 1;
989	args.in_args[0].value = entry->d_name.name;
990	err = fuse_simple_request(fm, &args);
991	if (!err) {
992		fuse_dir_changed(dir);
993		fuse_entry_unlinked(entry);
994	} else if (err == -EINTR || err == -ENOENT)
995		fuse_invalidate_entry(entry);
996	return err;
997}
998
999static int fuse_rmdir(struct inode *dir, struct dentry *entry)
1000{
1001	int err;
1002	struct fuse_mount *fm = get_fuse_mount(dir);
1003	FUSE_ARGS(args);
1004
1005	if (fuse_is_bad(dir))
1006		return -EIO;
1007
1008	args.opcode = FUSE_RMDIR;
1009	args.nodeid = get_node_id(dir);
1010	args.in_numargs = 1;
1011	args.in_args[0].size = entry->d_name.len + 1;
1012	args.in_args[0].value = entry->d_name.name;
1013	err = fuse_simple_request(fm, &args);
1014	if (!err) {
1015		fuse_dir_changed(dir);
1016		fuse_entry_unlinked(entry);
1017	} else if (err == -EINTR || err == -ENOENT)
1018		fuse_invalidate_entry(entry);
1019	return err;
1020}
1021
1022static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
1023			      struct inode *newdir, struct dentry *newent,
1024			      unsigned int flags, int opcode, size_t argsize)
1025{
1026	int err;
1027	struct fuse_rename2_in inarg;
1028	struct fuse_mount *fm = get_fuse_mount(olddir);
1029	FUSE_ARGS(args);
1030
1031	memset(&inarg, 0, argsize);
1032	inarg.newdir = get_node_id(newdir);
1033	inarg.flags = flags;
1034	args.opcode = opcode;
1035	args.nodeid = get_node_id(olddir);
1036	args.in_numargs = 3;
1037	args.in_args[0].size = argsize;
1038	args.in_args[0].value = &inarg;
1039	args.in_args[1].size = oldent->d_name.len + 1;
1040	args.in_args[1].value = oldent->d_name.name;
1041	args.in_args[2].size = newent->d_name.len + 1;
1042	args.in_args[2].value = newent->d_name.name;
1043	err = fuse_simple_request(fm, &args);
1044	if (!err) {
1045		/* ctime changes */
1046		fuse_update_ctime(d_inode(oldent));
1047
1048		if (flags & RENAME_EXCHANGE)
1049			fuse_update_ctime(d_inode(newent));
1050
1051		fuse_dir_changed(olddir);
1052		if (olddir != newdir)
1053			fuse_dir_changed(newdir);
1054
1055		/* newent will end up negative */
1056		if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent))
1057			fuse_entry_unlinked(newent);
1058	} else if (err == -EINTR || err == -ENOENT) {
1059		/* If request was interrupted, DEITY only knows if the
1060		   rename actually took place.  If the invalidation
1061		   fails (e.g. some process has CWD under the renamed
1062		   directory), then there can be inconsistency between
1063		   the dcache and the real filesystem.  Tough luck. */
1064		fuse_invalidate_entry(oldent);
1065		if (d_really_is_positive(newent))
1066			fuse_invalidate_entry(newent);
1067	}
1068
1069	return err;
1070}
1071
1072static int fuse_rename2(struct mnt_idmap *idmap, struct inode *olddir,
1073			struct dentry *oldent, struct inode *newdir,
1074			struct dentry *newent, unsigned int flags)
1075{
1076	struct fuse_conn *fc = get_fuse_conn(olddir);
1077	int err;
1078
1079	if (fuse_is_bad(olddir))
1080		return -EIO;
1081
1082	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
1083		return -EINVAL;
1084
1085	if (flags) {
1086		if (fc->no_rename2 || fc->minor < 23)
1087			return -EINVAL;
1088
1089		err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
1090					 FUSE_RENAME2,
1091					 sizeof(struct fuse_rename2_in));
1092		if (err == -ENOSYS) {
1093			fc->no_rename2 = 1;
1094			err = -EINVAL;
1095		}
1096	} else {
1097		err = fuse_rename_common(olddir, oldent, newdir, newent, 0,
1098					 FUSE_RENAME,
1099					 sizeof(struct fuse_rename_in));
1100	}
1101
1102	return err;
1103}
1104
1105static int fuse_link(struct dentry *entry, struct inode *newdir,
1106		     struct dentry *newent)
1107{
1108	int err;
1109	struct fuse_link_in inarg;
1110	struct inode *inode = d_inode(entry);
1111	struct fuse_mount *fm = get_fuse_mount(inode);
1112	FUSE_ARGS(args);
1113
1114	memset(&inarg, 0, sizeof(inarg));
1115	inarg.oldnodeid = get_node_id(inode);
1116	args.opcode = FUSE_LINK;
1117	args.in_numargs = 2;
1118	args.in_args[0].size = sizeof(inarg);
1119	args.in_args[0].value = &inarg;
1120	args.in_args[1].size = newent->d_name.len + 1;
1121	args.in_args[1].value = newent->d_name.name;
1122	err = create_new_entry(fm, &args, newdir, newent, inode->i_mode);
1123	if (!err)
1124		fuse_update_ctime_in_cache(inode);
1125	else if (err == -EINTR)
1126		fuse_invalidate_attr(inode);
1127
1128	return err;
1129}
1130
1131static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
1132			  struct kstat *stat)
1133{
1134	unsigned int blkbits;
1135	struct fuse_conn *fc = get_fuse_conn(inode);
1136
1137	stat->dev = inode->i_sb->s_dev;
1138	stat->ino = attr->ino;
1139	stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
1140	stat->nlink = attr->nlink;
1141	stat->uid = make_kuid(fc->user_ns, attr->uid);
1142	stat->gid = make_kgid(fc->user_ns, attr->gid);
1143	stat->rdev = inode->i_rdev;
1144	stat->atime.tv_sec = attr->atime;
1145	stat->atime.tv_nsec = attr->atimensec;
1146	stat->mtime.tv_sec = attr->mtime;
1147	stat->mtime.tv_nsec = attr->mtimensec;
1148	stat->ctime.tv_sec = attr->ctime;
1149	stat->ctime.tv_nsec = attr->ctimensec;
1150	stat->size = attr->size;
1151	stat->blocks = attr->blocks;
1152
1153	if (attr->blksize != 0)
1154		blkbits = ilog2(attr->blksize);
1155	else
1156		blkbits = inode->i_sb->s_blocksize_bits;
1157
1158	stat->blksize = 1 << blkbits;
1159}
1160
1161static void fuse_statx_to_attr(struct fuse_statx *sx, struct fuse_attr *attr)
1162{
1163	memset(attr, 0, sizeof(*attr));
1164	attr->ino = sx->ino;
1165	attr->size = sx->size;
1166	attr->blocks = sx->blocks;
1167	attr->atime = sx->atime.tv_sec;
1168	attr->mtime = sx->mtime.tv_sec;
1169	attr->ctime = sx->ctime.tv_sec;
1170	attr->atimensec = sx->atime.tv_nsec;
1171	attr->mtimensec = sx->mtime.tv_nsec;
1172	attr->ctimensec = sx->ctime.tv_nsec;
1173	attr->mode = sx->mode;
1174	attr->nlink = sx->nlink;
1175	attr->uid = sx->uid;
1176	attr->gid = sx->gid;
1177	attr->rdev = new_encode_dev(MKDEV(sx->rdev_major, sx->rdev_minor));
1178	attr->blksize = sx->blksize;
1179}
1180
1181static int fuse_do_statx(struct inode *inode, struct file *file,
1182			 struct kstat *stat)
1183{
1184	int err;
1185	struct fuse_attr attr;
1186	struct fuse_statx *sx;
1187	struct fuse_statx_in inarg;
1188	struct fuse_statx_out outarg;
1189	struct fuse_mount *fm = get_fuse_mount(inode);
1190	u64 attr_version = fuse_get_attr_version(fm->fc);
1191	FUSE_ARGS(args);
1192
1193	memset(&inarg, 0, sizeof(inarg));
1194	memset(&outarg, 0, sizeof(outarg));
1195	/* Directories have separate file-handle space */
1196	if (file && S_ISREG(inode->i_mode)) {
1197		struct fuse_file *ff = file->private_data;
1198
1199		inarg.getattr_flags |= FUSE_GETATTR_FH;
1200		inarg.fh = ff->fh;
1201	}
1202	/* For now leave sync hints as the default, request all stats. */
1203	inarg.sx_flags = 0;
1204	inarg.sx_mask = STATX_BASIC_STATS | STATX_BTIME;
1205	args.opcode = FUSE_STATX;
1206	args.nodeid = get_node_id(inode);
1207	args.in_numargs = 1;
1208	args.in_args[0].size = sizeof(inarg);
1209	args.in_args[0].value = &inarg;
1210	args.out_numargs = 1;
1211	args.out_args[0].size = sizeof(outarg);
1212	args.out_args[0].value = &outarg;
1213	err = fuse_simple_request(fm, &args);
1214	if (err)
1215		return err;
1216
1217	sx = &outarg.stat;
1218	if (((sx->mask & STATX_SIZE) && !fuse_valid_size(sx->size)) ||
1219	    ((sx->mask & STATX_TYPE) && (!fuse_valid_type(sx->mode) ||
1220					 inode_wrong_type(inode, sx->mode)))) {
1221		fuse_make_bad(inode);
1222		return -EIO;
1223	}
1224
1225	fuse_statx_to_attr(&outarg.stat, &attr);
1226	if ((sx->mask & STATX_BASIC_STATS) == STATX_BASIC_STATS) {
1227		fuse_change_attributes(inode, &attr, &outarg.stat,
1228				       ATTR_TIMEOUT(&outarg), attr_version);
1229	}
1230
1231	if (stat) {
1232		stat->result_mask = sx->mask & (STATX_BASIC_STATS | STATX_BTIME);
1233		stat->btime.tv_sec = sx->btime.tv_sec;
1234		stat->btime.tv_nsec = min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1);
1235		fuse_fillattr(inode, &attr, stat);
1236		stat->result_mask |= STATX_TYPE;
1237	}
1238
1239	return 0;
1240}
1241
1242static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
1243			   struct file *file)
1244{
1245	int err;
1246	struct fuse_getattr_in inarg;
1247	struct fuse_attr_out outarg;
1248	struct fuse_mount *fm = get_fuse_mount(inode);
1249	FUSE_ARGS(args);
1250	u64 attr_version;
1251
1252	attr_version = fuse_get_attr_version(fm->fc);
1253
1254	memset(&inarg, 0, sizeof(inarg));
1255	memset(&outarg, 0, sizeof(outarg));
1256	/* Directories have separate file-handle space */
1257	if (file && S_ISREG(inode->i_mode)) {
1258		struct fuse_file *ff = file->private_data;
1259
1260		inarg.getattr_flags |= FUSE_GETATTR_FH;
1261		inarg.fh = ff->fh;
1262	}
1263	args.opcode = FUSE_GETATTR;
1264	args.nodeid = get_node_id(inode);
1265	args.in_numargs = 1;
1266	args.in_args[0].size = sizeof(inarg);
1267	args.in_args[0].value = &inarg;
1268	args.out_numargs = 1;
1269	args.out_args[0].size = sizeof(outarg);
1270	args.out_args[0].value = &outarg;
1271	err = fuse_simple_request(fm, &args);
1272	if (!err) {
1273		if (fuse_invalid_attr(&outarg.attr) ||
1274		    inode_wrong_type(inode, outarg.attr.mode)) {
1275			fuse_make_bad(inode);
1276			err = -EIO;
1277		} else {
1278			fuse_change_attributes(inode, &outarg.attr, NULL,
1279					       ATTR_TIMEOUT(&outarg),
1280					       attr_version);
1281			if (stat)
1282				fuse_fillattr(inode, &outarg.attr, stat);
1283		}
1284	}
1285	return err;
1286}
1287
1288static int fuse_update_get_attr(struct inode *inode, struct file *file,
1289				struct kstat *stat, u32 request_mask,
1290				unsigned int flags)
1291{
1292	struct fuse_inode *fi = get_fuse_inode(inode);
1293	struct fuse_conn *fc = get_fuse_conn(inode);
1294	int err = 0;
1295	bool sync;
1296	u32 inval_mask = READ_ONCE(fi->inval_mask);
1297	u32 cache_mask = fuse_get_cache_mask(inode);
1298
1299
1300	/* FUSE only supports basic stats and possibly btime */
1301	request_mask &= STATX_BASIC_STATS | STATX_BTIME;
1302retry:
1303	if (fc->no_statx)
1304		request_mask &= STATX_BASIC_STATS;
1305
1306	if (!request_mask)
1307		sync = false;
1308	else if (flags & AT_STATX_FORCE_SYNC)
1309		sync = true;
1310	else if (flags & AT_STATX_DONT_SYNC)
1311		sync = false;
1312	else if (request_mask & inval_mask & ~cache_mask)
1313		sync = true;
1314	else
1315		sync = time_before64(fi->i_time, get_jiffies_64());
1316
1317	if (sync) {
1318		forget_all_cached_acls(inode);
1319		/* Try statx if BTIME is requested */
1320		if (!fc->no_statx && (request_mask & ~STATX_BASIC_STATS)) {
1321			err = fuse_do_statx(inode, file, stat);
1322			if (err == -ENOSYS) {
1323				fc->no_statx = 1;
1324				err = 0;
1325				goto retry;
1326			}
1327		} else {
1328			err = fuse_do_getattr(inode, stat, file);
1329		}
1330	} else if (stat) {
1331		generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
1332		stat->mode = fi->orig_i_mode;
1333		stat->ino = fi->orig_ino;
1334		if (test_bit(FUSE_I_BTIME, &fi->state)) {
1335			stat->btime = fi->i_btime;
1336			stat->result_mask |= STATX_BTIME;
1337		}
1338	}
1339
1340	return err;
1341}
1342
1343int fuse_update_attributes(struct inode *inode, struct file *file, u32 mask)
1344{
1345	return fuse_update_get_attr(inode, file, NULL, mask, 0);
1346}
1347
1348int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
1349			     u64 child_nodeid, struct qstr *name, u32 flags)
1350{
1351	int err = -ENOTDIR;
1352	struct inode *parent;
1353	struct dentry *dir;
1354	struct dentry *entry;
1355
1356	parent = fuse_ilookup(fc, parent_nodeid, NULL);
1357	if (!parent)
1358		return -ENOENT;
1359
1360	inode_lock_nested(parent, I_MUTEX_PARENT);
1361	if (!S_ISDIR(parent->i_mode))
1362		goto unlock;
1363
1364	err = -ENOENT;
1365	dir = d_find_alias(parent);
1366	if (!dir)
1367		goto unlock;
1368
1369	name->hash = full_name_hash(dir, name->name, name->len);
1370	entry = d_lookup(dir, name);
1371	dput(dir);
1372	if (!entry)
1373		goto unlock;
1374
1375	fuse_dir_changed(parent);
1376	if (!(flags & FUSE_EXPIRE_ONLY))
1377		d_invalidate(entry);
1378	fuse_invalidate_entry_cache(entry);
1379
1380	if (child_nodeid != 0 && d_really_is_positive(entry)) {
1381		inode_lock(d_inode(entry));
1382		if (get_node_id(d_inode(entry)) != child_nodeid) {
1383			err = -ENOENT;
1384			goto badentry;
1385		}
1386		if (d_mountpoint(entry)) {
1387			err = -EBUSY;
1388			goto badentry;
1389		}
1390		if (d_is_dir(entry)) {
1391			shrink_dcache_parent(entry);
1392			if (!simple_empty(entry)) {
1393				err = -ENOTEMPTY;
1394				goto badentry;
1395			}
1396			d_inode(entry)->i_flags |= S_DEAD;
1397		}
1398		dont_mount(entry);
1399		clear_nlink(d_inode(entry));
1400		err = 0;
1401 badentry:
1402		inode_unlock(d_inode(entry));
1403		if (!err)
1404			d_delete(entry);
1405	} else {
1406		err = 0;
1407	}
1408	dput(entry);
1409
1410 unlock:
1411	inode_unlock(parent);
1412	iput(parent);
1413	return err;
1414}
1415
1416static inline bool fuse_permissible_uidgid(struct fuse_conn *fc)
1417{
1418	const struct cred *cred = current_cred();
1419
1420	return (uid_eq(cred->euid, fc->user_id) &&
1421		uid_eq(cred->suid, fc->user_id) &&
1422		uid_eq(cred->uid,  fc->user_id) &&
1423		gid_eq(cred->egid, fc->group_id) &&
1424		gid_eq(cred->sgid, fc->group_id) &&
1425		gid_eq(cred->gid,  fc->group_id));
1426}
1427
1428/*
1429 * Calling into a user-controlled filesystem gives the filesystem
1430 * daemon ptrace-like capabilities over the current process.  This
1431 * means, that the filesystem daemon is able to record the exact
1432 * filesystem operations performed, and can also control the behavior
1433 * of the requester process in otherwise impossible ways.  For example
1434 * it can delay the operation for arbitrary length of time allowing
1435 * DoS against the requester.
1436 *
1437 * For this reason only those processes can call into the filesystem,
1438 * for which the owner of the mount has ptrace privilege.  This
1439 * excludes processes started by other users, suid or sgid processes.
1440 */
1441bool fuse_allow_current_process(struct fuse_conn *fc)
1442{
1443	bool allow;
1444
1445	if (fc->allow_other)
1446		allow = current_in_userns(fc->user_ns);
1447	else
1448		allow = fuse_permissible_uidgid(fc);
1449
1450	if (!allow && allow_sys_admin_access && capable(CAP_SYS_ADMIN))
1451		allow = true;
1452
1453	return allow;
1454}
1455
1456static int fuse_access(struct inode *inode, int mask)
1457{
1458	struct fuse_mount *fm = get_fuse_mount(inode);
1459	FUSE_ARGS(args);
1460	struct fuse_access_in inarg;
1461	int err;
1462
1463	BUG_ON(mask & MAY_NOT_BLOCK);
1464
1465	if (fm->fc->no_access)
1466		return 0;
1467
1468	memset(&inarg, 0, sizeof(inarg));
1469	inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
1470	args.opcode = FUSE_ACCESS;
1471	args.nodeid = get_node_id(inode);
1472	args.in_numargs = 1;
1473	args.in_args[0].size = sizeof(inarg);
1474	args.in_args[0].value = &inarg;
1475	err = fuse_simple_request(fm, &args);
1476	if (err == -ENOSYS) {
1477		fm->fc->no_access = 1;
1478		err = 0;
1479	}
1480	return err;
1481}
1482
1483static int fuse_perm_getattr(struct inode *inode, int mask)
1484{
1485	if (mask & MAY_NOT_BLOCK)
1486		return -ECHILD;
1487
1488	forget_all_cached_acls(inode);
1489	return fuse_do_getattr(inode, NULL, NULL);
1490}
1491
1492/*
1493 * Check permission.  The two basic access models of FUSE are:
1494 *
1495 * 1) Local access checking ('default_permissions' mount option) based
1496 * on file mode.  This is the plain old disk filesystem permission
1497 * model.
1498 *
1499 * 2) "Remote" access checking, where server is responsible for
1500 * checking permission in each inode operation.  An exception to this
1501 * is if ->permission() was invoked from sys_access() in which case an
1502 * access request is sent.  Execute permission is still checked
1503 * locally based on file mode.
1504 */
1505static int fuse_permission(struct mnt_idmap *idmap,
1506			   struct inode *inode, int mask)
1507{
1508	struct fuse_conn *fc = get_fuse_conn(inode);
1509	bool refreshed = false;
1510	int err = 0;
1511
1512	if (fuse_is_bad(inode))
1513		return -EIO;
1514
1515	if (!fuse_allow_current_process(fc))
1516		return -EACCES;
1517
1518	/*
1519	 * If attributes are needed, refresh them before proceeding
1520	 */
1521	if (fc->default_permissions ||
1522	    ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1523		struct fuse_inode *fi = get_fuse_inode(inode);
1524		u32 perm_mask = STATX_MODE | STATX_UID | STATX_GID;
1525
1526		if (perm_mask & READ_ONCE(fi->inval_mask) ||
1527		    time_before64(fi->i_time, get_jiffies_64())) {
1528			refreshed = true;
1529
1530			err = fuse_perm_getattr(inode, mask);
1531			if (err)
1532				return err;
1533		}
1534	}
1535
1536	if (fc->default_permissions) {
1537		err = generic_permission(&nop_mnt_idmap, inode, mask);
1538
1539		/* If permission is denied, try to refresh file
1540		   attributes.  This is also needed, because the root
1541		   node will at first have no permissions */
1542		if (err == -EACCES && !refreshed) {
1543			err = fuse_perm_getattr(inode, mask);
1544			if (!err)
1545				err = generic_permission(&nop_mnt_idmap,
1546							 inode, mask);
1547		}
1548
1549		/* Note: the opposite of the above test does not
1550		   exist.  So if permissions are revoked this won't be
1551		   noticed immediately, only after the attribute
1552		   timeout has expired */
1553	} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1554		err = fuse_access(inode, mask);
1555	} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1556		if (!(inode->i_mode & S_IXUGO)) {
1557			if (refreshed)
1558				return -EACCES;
1559
1560			err = fuse_perm_getattr(inode, mask);
1561			if (!err && !(inode->i_mode & S_IXUGO))
1562				return -EACCES;
1563		}
1564	}
1565	return err;
1566}
1567
1568static int fuse_readlink_page(struct inode *inode, struct page *page)
1569{
1570	struct fuse_mount *fm = get_fuse_mount(inode);
1571	struct fuse_page_desc desc = { .length = PAGE_SIZE - 1 };
1572	struct fuse_args_pages ap = {
1573		.num_pages = 1,
1574		.pages = &page,
1575		.descs = &desc,
1576	};
1577	char *link;
1578	ssize_t res;
1579
1580	ap.args.opcode = FUSE_READLINK;
1581	ap.args.nodeid = get_node_id(inode);
1582	ap.args.out_pages = true;
1583	ap.args.out_argvar = true;
1584	ap.args.page_zeroing = true;
1585	ap.args.out_numargs = 1;
1586	ap.args.out_args[0].size = desc.length;
1587	res = fuse_simple_request(fm, &ap.args);
1588
1589	fuse_invalidate_atime(inode);
1590
1591	if (res < 0)
1592		return res;
1593
1594	if (WARN_ON(res >= PAGE_SIZE))
1595		return -EIO;
1596
1597	link = page_address(page);
1598	link[res] = '\0';
1599
1600	return 0;
1601}
1602
1603static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
1604				 struct delayed_call *callback)
1605{
1606	struct fuse_conn *fc = get_fuse_conn(inode);
1607	struct page *page;
1608	int err;
1609
1610	err = -EIO;
1611	if (fuse_is_bad(inode))
1612		goto out_err;
1613
1614	if (fc->cache_symlinks)
1615		return page_get_link(dentry, inode, callback);
1616
1617	err = -ECHILD;
1618	if (!dentry)
1619		goto out_err;
1620
1621	page = alloc_page(GFP_KERNEL);
1622	err = -ENOMEM;
1623	if (!page)
1624		goto out_err;
1625
1626	err = fuse_readlink_page(inode, page);
1627	if (err) {
1628		__free_page(page);
1629		goto out_err;
1630	}
1631
1632	set_delayed_call(callback, page_put_link, page);
1633
1634	return page_address(page);
1635
1636out_err:
1637	return ERR_PTR(err);
1638}
1639
1640static int fuse_dir_open(struct inode *inode, struct file *file)
1641{
1642	struct fuse_mount *fm = get_fuse_mount(inode);
1643	int err;
1644
1645	if (fuse_is_bad(inode))
1646		return -EIO;
1647
1648	err = generic_file_open(inode, file);
1649	if (err)
1650		return err;
1651
1652	err = fuse_do_open(fm, get_node_id(inode), file, true);
1653	if (!err) {
1654		struct fuse_file *ff = file->private_data;
1655
1656		/*
1657		 * Keep handling FOPEN_STREAM and FOPEN_NONSEEKABLE for
1658		 * directories for backward compatibility, though it's unlikely
1659		 * to be useful.
1660		 */
1661		if (ff->open_flags & (FOPEN_STREAM | FOPEN_NONSEEKABLE))
1662			nonseekable_open(inode, file);
1663	}
1664
1665	return err;
1666}
1667
1668static int fuse_dir_release(struct inode *inode, struct file *file)
1669{
1670	fuse_release_common(file, true);
1671
1672	return 0;
1673}
1674
1675static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
1676			  int datasync)
1677{
1678	struct inode *inode = file->f_mapping->host;
1679	struct fuse_conn *fc = get_fuse_conn(inode);
1680	int err;
1681
1682	if (fuse_is_bad(inode))
1683		return -EIO;
1684
1685	if (fc->no_fsyncdir)
1686		return 0;
1687
1688	inode_lock(inode);
1689	err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNCDIR);
1690	if (err == -ENOSYS) {
1691		fc->no_fsyncdir = 1;
1692		err = 0;
1693	}
1694	inode_unlock(inode);
1695
1696	return err;
1697}
1698
1699static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
1700			    unsigned long arg)
1701{
1702	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1703
1704	/* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
1705	if (fc->minor < 18)
1706		return -ENOTTY;
1707
1708	return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
1709}
1710
1711static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
1712				   unsigned long arg)
1713{
1714	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1715
1716	if (fc->minor < 18)
1717		return -ENOTTY;
1718
1719	return fuse_ioctl_common(file, cmd, arg,
1720				 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
1721}
1722
1723static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
1724{
1725	/* Always update if mtime is explicitly set  */
1726	if (ivalid & ATTR_MTIME_SET)
1727		return true;
1728
1729	/* Or if kernel i_mtime is the official one */
1730	if (trust_local_mtime)
1731		return true;
1732
1733	/* If it's an open(O_TRUNC) or an ftruncate(), don't update */
1734	if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
1735		return false;
1736
1737	/* In all other cases update */
1738	return true;
1739}
1740
1741static void iattr_to_fattr(struct fuse_conn *fc, struct iattr *iattr,
1742			   struct fuse_setattr_in *arg, bool trust_local_cmtime)
1743{
1744	unsigned ivalid = iattr->ia_valid;
1745
1746	if (ivalid & ATTR_MODE)
1747		arg->valid |= FATTR_MODE,   arg->mode = iattr->ia_mode;
1748	if (ivalid & ATTR_UID)
1749		arg->valid |= FATTR_UID,    arg->uid = from_kuid(fc->user_ns, iattr->ia_uid);
1750	if (ivalid & ATTR_GID)
1751		arg->valid |= FATTR_GID,    arg->gid = from_kgid(fc->user_ns, iattr->ia_gid);
1752	if (ivalid & ATTR_SIZE)
1753		arg->valid |= FATTR_SIZE,   arg->size = iattr->ia_size;
1754	if (ivalid & ATTR_ATIME) {
1755		arg->valid |= FATTR_ATIME;
1756		arg->atime = iattr->ia_atime.tv_sec;
1757		arg->atimensec = iattr->ia_atime.tv_nsec;
1758		if (!(ivalid & ATTR_ATIME_SET))
1759			arg->valid |= FATTR_ATIME_NOW;
1760	}
1761	if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
1762		arg->valid |= FATTR_MTIME;
1763		arg->mtime = iattr->ia_mtime.tv_sec;
1764		arg->mtimensec = iattr->ia_mtime.tv_nsec;
1765		if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
1766			arg->valid |= FATTR_MTIME_NOW;
1767	}
1768	if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
1769		arg->valid |= FATTR_CTIME;
1770		arg->ctime = iattr->ia_ctime.tv_sec;
1771		arg->ctimensec = iattr->ia_ctime.tv_nsec;
1772	}
1773}
1774
1775/*
1776 * Prevent concurrent writepages on inode
1777 *
1778 * This is done by adding a negative bias to the inode write counter
1779 * and waiting for all pending writes to finish.
1780 */
1781void fuse_set_nowrite(struct inode *inode)
1782{
1783	struct fuse_inode *fi = get_fuse_inode(inode);
1784
1785	BUG_ON(!inode_is_locked(inode));
1786
1787	spin_lock(&fi->lock);
1788	BUG_ON(fi->writectr < 0);
1789	fi->writectr += FUSE_NOWRITE;
1790	spin_unlock(&fi->lock);
1791	wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
1792}
1793
1794/*
1795 * Allow writepages on inode
1796 *
1797 * Remove the bias from the writecounter and send any queued
1798 * writepages.
1799 */
1800static void __fuse_release_nowrite(struct inode *inode)
1801{
1802	struct fuse_inode *fi = get_fuse_inode(inode);
1803
1804	BUG_ON(fi->writectr != FUSE_NOWRITE);
1805	fi->writectr = 0;
1806	fuse_flush_writepages(inode);
1807}
1808
1809void fuse_release_nowrite(struct inode *inode)
1810{
1811	struct fuse_inode *fi = get_fuse_inode(inode);
1812
1813	spin_lock(&fi->lock);
1814	__fuse_release_nowrite(inode);
1815	spin_unlock(&fi->lock);
1816}
1817
1818static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
1819			      struct inode *inode,
1820			      struct fuse_setattr_in *inarg_p,
1821			      struct fuse_attr_out *outarg_p)
1822{
1823	args->opcode = FUSE_SETATTR;
1824	args->nodeid = get_node_id(inode);
1825	args->in_numargs = 1;
1826	args->in_args[0].size = sizeof(*inarg_p);
1827	args->in_args[0].value = inarg_p;
1828	args->out_numargs = 1;
1829	args->out_args[0].size = sizeof(*outarg_p);
1830	args->out_args[0].value = outarg_p;
1831}
1832
1833/*
1834 * Flush inode->i_mtime to the server
1835 */
1836int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
1837{
1838	struct fuse_mount *fm = get_fuse_mount(inode);
1839	FUSE_ARGS(args);
1840	struct fuse_setattr_in inarg;
1841	struct fuse_attr_out outarg;
1842
1843	memset(&inarg, 0, sizeof(inarg));
1844	memset(&outarg, 0, sizeof(outarg));
1845
1846	inarg.valid = FATTR_MTIME;
1847	inarg.mtime = inode_get_mtime_sec(inode);
1848	inarg.mtimensec = inode_get_mtime_nsec(inode);
1849	if (fm->fc->minor >= 23) {
1850		inarg.valid |= FATTR_CTIME;
1851		inarg.ctime = inode_get_ctime_sec(inode);
1852		inarg.ctimensec = inode_get_ctime_nsec(inode);
1853	}
1854	if (ff) {
1855		inarg.valid |= FATTR_FH;
1856		inarg.fh = ff->fh;
1857	}
1858	fuse_setattr_fill(fm->fc, &args, inode, &inarg, &outarg);
1859
1860	return fuse_simple_request(fm, &args);
1861}
1862
1863/*
1864 * Set attributes, and at the same time refresh them.
1865 *
1866 * Truncation is slightly complicated, because the 'truncate' request
1867 * may fail, in which case we don't want to touch the mapping.
1868 * vmtruncate() doesn't allow for this case, so do the rlimit checking
1869 * and the actual truncation by hand.
1870 */
1871int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
1872		    struct file *file)
1873{
1874	struct inode *inode = d_inode(dentry);
1875	struct fuse_mount *fm = get_fuse_mount(inode);
1876	struct fuse_conn *fc = fm->fc;
1877	struct fuse_inode *fi = get_fuse_inode(inode);
1878	struct address_space *mapping = inode->i_mapping;
1879	FUSE_ARGS(args);
1880	struct fuse_setattr_in inarg;
1881	struct fuse_attr_out outarg;
1882	bool is_truncate = false;
1883	bool is_wb = fc->writeback_cache && S_ISREG(inode->i_mode);
1884	loff_t oldsize;
1885	int err;
1886	bool trust_local_cmtime = is_wb;
1887	bool fault_blocked = false;
1888
1889	if (!fc->default_permissions)
1890		attr->ia_valid |= ATTR_FORCE;
1891
1892	err = setattr_prepare(&nop_mnt_idmap, dentry, attr);
1893	if (err)
1894		return err;
1895
1896	if (attr->ia_valid & ATTR_SIZE) {
1897		if (WARN_ON(!S_ISREG(inode->i_mode)))
1898			return -EIO;
1899		is_truncate = true;
1900	}
1901
1902	if (FUSE_IS_DAX(inode) && is_truncate) {
1903		filemap_invalidate_lock(mapping);
1904		fault_blocked = true;
1905		err = fuse_dax_break_layouts(inode, 0, 0);
1906		if (err) {
1907			filemap_invalidate_unlock(mapping);
1908			return err;
1909		}
1910	}
1911
1912	if (attr->ia_valid & ATTR_OPEN) {
1913		/* This is coming from open(..., ... | O_TRUNC); */
1914		WARN_ON(!(attr->ia_valid & ATTR_SIZE));
1915		WARN_ON(attr->ia_size != 0);
1916		if (fc->atomic_o_trunc) {
1917			/*
1918			 * No need to send request to userspace, since actual
1919			 * truncation has already been done by OPEN.  But still
1920			 * need to truncate page cache.
1921			 */
1922			i_size_write(inode, 0);
1923			truncate_pagecache(inode, 0);
1924			goto out;
1925		}
1926		file = NULL;
1927	}
1928
1929	/* Flush dirty data/metadata before non-truncate SETATTR */
1930	if (is_wb &&
1931	    attr->ia_valid &
1932			(ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET |
1933			 ATTR_TIMES_SET)) {
1934		err = write_inode_now(inode, true);
1935		if (err)
1936			return err;
1937
1938		fuse_set_nowrite(inode);
1939		fuse_release_nowrite(inode);
1940	}
1941
1942	if (is_truncate) {
1943		fuse_set_nowrite(inode);
1944		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1945		if (trust_local_cmtime && attr->ia_size != inode->i_size)
1946			attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
1947	}
1948
1949	memset(&inarg, 0, sizeof(inarg));
1950	memset(&outarg, 0, sizeof(outarg));
1951	iattr_to_fattr(fc, attr, &inarg, trust_local_cmtime);
1952	if (file) {
1953		struct fuse_file *ff = file->private_data;
1954		inarg.valid |= FATTR_FH;
1955		inarg.fh = ff->fh;
1956	}
1957
1958	/* Kill suid/sgid for non-directory chown unconditionally */
1959	if (fc->handle_killpriv_v2 && !S_ISDIR(inode->i_mode) &&
1960	    attr->ia_valid & (ATTR_UID | ATTR_GID))
1961		inarg.valid |= FATTR_KILL_SUIDGID;
1962
1963	if (attr->ia_valid & ATTR_SIZE) {
1964		/* For mandatory locking in truncate */
1965		inarg.valid |= FATTR_LOCKOWNER;
1966		inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
1967
1968		/* Kill suid/sgid for truncate only if no CAP_FSETID */
1969		if (fc->handle_killpriv_v2 && !capable(CAP_FSETID))
1970			inarg.valid |= FATTR_KILL_SUIDGID;
1971	}
1972	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
1973	err = fuse_simple_request(fm, &args);
1974	if (err) {
1975		if (err == -EINTR)
1976			fuse_invalidate_attr(inode);
1977		goto error;
1978	}
1979
1980	if (fuse_invalid_attr(&outarg.attr) ||
1981	    inode_wrong_type(inode, outarg.attr.mode)) {
1982		fuse_make_bad(inode);
1983		err = -EIO;
1984		goto error;
1985	}
1986
1987	spin_lock(&fi->lock);
1988	/* the kernel maintains i_mtime locally */
1989	if (trust_local_cmtime) {
1990		if (attr->ia_valid & ATTR_MTIME)
1991			inode_set_mtime_to_ts(inode, attr->ia_mtime);
1992		if (attr->ia_valid & ATTR_CTIME)
1993			inode_set_ctime_to_ts(inode, attr->ia_ctime);
1994		/* FIXME: clear I_DIRTY_SYNC? */
1995	}
1996
1997	fuse_change_attributes_common(inode, &outarg.attr, NULL,
1998				      ATTR_TIMEOUT(&outarg),
1999				      fuse_get_cache_mask(inode));
2000	oldsize = inode->i_size;
2001	/* see the comment in fuse_change_attributes() */
2002	if (!is_wb || is_truncate)
2003		i_size_write(inode, outarg.attr.size);
2004
2005	if (is_truncate) {
2006		/* NOTE: this may release/reacquire fi->lock */
2007		__fuse_release_nowrite(inode);
2008	}
2009	spin_unlock(&fi->lock);
2010
2011	/*
2012	 * Only call invalidate_inode_pages2() after removing
2013	 * FUSE_NOWRITE, otherwise fuse_launder_folio() would deadlock.
2014	 */
2015	if ((is_truncate || !is_wb) &&
2016	    S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
2017		truncate_pagecache(inode, outarg.attr.size);
2018		invalidate_inode_pages2(mapping);
2019	}
2020
2021	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2022out:
2023	if (fault_blocked)
2024		filemap_invalidate_unlock(mapping);
2025
2026	return 0;
2027
2028error:
2029	if (is_truncate)
2030		fuse_release_nowrite(inode);
2031
2032	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2033
2034	if (fault_blocked)
2035		filemap_invalidate_unlock(mapping);
2036	return err;
2037}
2038
2039static int fuse_setattr(struct mnt_idmap *idmap, struct dentry *entry,
2040			struct iattr *attr)
2041{
2042	struct inode *inode = d_inode(entry);
2043	struct fuse_conn *fc = get_fuse_conn(inode);
2044	struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
2045	int ret;
2046
2047	if (fuse_is_bad(inode))
2048		return -EIO;
2049
2050	if (!fuse_allow_current_process(get_fuse_conn(inode)))
2051		return -EACCES;
2052
2053	if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) {
2054		attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID |
2055				    ATTR_MODE);
2056
2057		/*
2058		 * The only sane way to reliably kill suid/sgid is to do it in
2059		 * the userspace filesystem
2060		 *
2061		 * This should be done on write(), truncate() and chown().
2062		 */
2063		if (!fc->handle_killpriv && !fc->handle_killpriv_v2) {
2064			/*
2065			 * ia_mode calculation may have used stale i_mode.
2066			 * Refresh and recalculate.
2067			 */
2068			ret = fuse_do_getattr(inode, NULL, file);
2069			if (ret)
2070				return ret;
2071
2072			attr->ia_mode = inode->i_mode;
2073			if (inode->i_mode & S_ISUID) {
2074				attr->ia_valid |= ATTR_MODE;
2075				attr->ia_mode &= ~S_ISUID;
2076			}
2077			if ((inode->i_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
2078				attr->ia_valid |= ATTR_MODE;
2079				attr->ia_mode &= ~S_ISGID;
2080			}
2081		}
2082	}
2083	if (!attr->ia_valid)
2084		return 0;
2085
2086	ret = fuse_do_setattr(entry, attr, file);
2087	if (!ret) {
2088		/*
2089		 * If filesystem supports acls it may have updated acl xattrs in
2090		 * the filesystem, so forget cached acls for the inode.
2091		 */
2092		if (fc->posix_acl)
2093			forget_all_cached_acls(inode);
2094
2095		/* Directory mode changed, may need to revalidate access */
2096		if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE))
2097			fuse_invalidate_entry_cache(entry);
2098	}
2099	return ret;
2100}
2101
2102static int fuse_getattr(struct mnt_idmap *idmap,
2103			const struct path *path, struct kstat *stat,
2104			u32 request_mask, unsigned int flags)
2105{
2106	struct inode *inode = d_inode(path->dentry);
2107	struct fuse_conn *fc = get_fuse_conn(inode);
2108
2109	if (fuse_is_bad(inode))
2110		return -EIO;
2111
2112	if (!fuse_allow_current_process(fc)) {
2113		if (!request_mask) {
2114			/*
2115			 * If user explicitly requested *nothing* then don't
2116			 * error out, but return st_dev only.
2117			 */
2118			stat->result_mask = 0;
2119			stat->dev = inode->i_sb->s_dev;
2120			return 0;
2121		}
2122		return -EACCES;
2123	}
2124
2125	return fuse_update_get_attr(inode, NULL, stat, request_mask, flags);
2126}
2127
2128static const struct inode_operations fuse_dir_inode_operations = {
2129	.lookup		= fuse_lookup,
2130	.mkdir		= fuse_mkdir,
2131	.symlink	= fuse_symlink,
2132	.unlink		= fuse_unlink,
2133	.rmdir		= fuse_rmdir,
2134	.rename		= fuse_rename2,
2135	.link		= fuse_link,
2136	.setattr	= fuse_setattr,
2137	.create		= fuse_create,
2138	.atomic_open	= fuse_atomic_open,
2139	.tmpfile	= fuse_tmpfile,
2140	.mknod		= fuse_mknod,
2141	.permission	= fuse_permission,
2142	.getattr	= fuse_getattr,
2143	.listxattr	= fuse_listxattr,
2144	.get_inode_acl	= fuse_get_inode_acl,
2145	.get_acl	= fuse_get_acl,
2146	.set_acl	= fuse_set_acl,
2147	.fileattr_get	= fuse_fileattr_get,
2148	.fileattr_set	= fuse_fileattr_set,
2149};
2150
2151static const struct file_operations fuse_dir_operations = {
2152	.llseek		= generic_file_llseek,
2153	.read		= generic_read_dir,
2154	.iterate_shared	= fuse_readdir,
2155	.open		= fuse_dir_open,
2156	.release	= fuse_dir_release,
2157	.fsync		= fuse_dir_fsync,
2158	.unlocked_ioctl	= fuse_dir_ioctl,
2159	.compat_ioctl	= fuse_dir_compat_ioctl,
2160};
2161
2162static const struct inode_operations fuse_common_inode_operations = {
2163	.setattr	= fuse_setattr,
2164	.permission	= fuse_permission,
2165	.getattr	= fuse_getattr,
2166	.listxattr	= fuse_listxattr,
2167	.get_inode_acl	= fuse_get_inode_acl,
2168	.get_acl	= fuse_get_acl,
2169	.set_acl	= fuse_set_acl,
2170	.fileattr_get	= fuse_fileattr_get,
2171	.fileattr_set	= fuse_fileattr_set,
2172};
2173
2174static const struct inode_operations fuse_symlink_inode_operations = {
2175	.setattr	= fuse_setattr,
2176	.get_link	= fuse_get_link,
2177	.getattr	= fuse_getattr,
2178	.listxattr	= fuse_listxattr,
2179};
2180
2181void fuse_init_common(struct inode *inode)
2182{
2183	inode->i_op = &fuse_common_inode_operations;
2184}
2185
2186void fuse_init_dir(struct inode *inode)
2187{
2188	struct fuse_inode *fi = get_fuse_inode(inode);
2189
2190	inode->i_op = &fuse_dir_inode_operations;
2191	inode->i_fop = &fuse_dir_operations;
2192
2193	spin_lock_init(&fi->rdc.lock);
2194	fi->rdc.cached = false;
2195	fi->rdc.size = 0;
2196	fi->rdc.pos = 0;
2197	fi->rdc.version = 0;
2198}
2199
2200static int fuse_symlink_read_folio(struct file *null, struct folio *folio)
2201{
2202	int err = fuse_readlink_page(folio->mapping->host, &folio->page);
2203
2204	if (!err)
2205		folio_mark_uptodate(folio);
2206
2207	folio_unlock(folio);
2208
2209	return err;
2210}
2211
2212static const struct address_space_operations fuse_symlink_aops = {
2213	.read_folio	= fuse_symlink_read_folio,
2214};
2215
2216void fuse_init_symlink(struct inode *inode)
2217{
2218	inode->i_op = &fuse_symlink_inode_operations;
2219	inode->i_data.a_ops = &fuse_symlink_aops;
2220	inode_nohighmem(inode);
2221}
2222