1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Common helpers for stackable filesystems and backing files.
4 *
5 * Forked from fs/overlayfs/file.c.
6 *
7 * Copyright (C) 2017 Red Hat, Inc.
8 * Copyright (C) 2023 CTERA Networks.
9 */
10
11#include <linux/fs.h>
12#include <linux/backing-file.h>
13#include <linux/splice.h>
14#include <linux/mm.h>
15
16#include "internal.h"
17
18/**
19 * backing_file_open - open a backing file for kernel internal use
20 * @user_path:	path that the user reuqested to open
21 * @flags:	open flags
22 * @real_path:	path of the backing file
23 * @cred:	credentials for open
24 *
25 * Open a backing file for a stackable filesystem (e.g., overlayfs).
26 * @user_path may be on the stackable filesystem and @real_path on the
27 * underlying filesystem.  In this case, we want to be able to return the
28 * @user_path of the stackable filesystem. This is done by embedding the
29 * returned file into a container structure that also stores the stacked
30 * file's path, which can be retrieved using backing_file_user_path().
31 */
32struct file *backing_file_open(const struct path *user_path, int flags,
33			       const struct path *real_path,
34			       const struct cred *cred)
35{
36	struct file *f;
37	int error;
38
39	f = alloc_empty_backing_file(flags, cred);
40	if (IS_ERR(f))
41		return f;
42
43	path_get(user_path);
44	*backing_file_user_path(f) = *user_path;
45	error = vfs_open(real_path, f);
46	if (error) {
47		fput(f);
48		f = ERR_PTR(error);
49	}
50
51	return f;
52}
53EXPORT_SYMBOL_GPL(backing_file_open);
54
55struct backing_aio {
56	struct kiocb iocb;
57	refcount_t ref;
58	struct kiocb *orig_iocb;
59	/* used for aio completion */
60	void (*end_write)(struct file *);
61	struct work_struct work;
62	long res;
63};
64
65static struct kmem_cache *backing_aio_cachep;
66
67#define BACKING_IOCB_MASK \
68	(IOCB_NOWAIT | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC | IOCB_APPEND)
69
70static rwf_t iocb_to_rw_flags(int flags)
71{
72	return (__force rwf_t)(flags & BACKING_IOCB_MASK);
73}
74
75static void backing_aio_put(struct backing_aio *aio)
76{
77	if (refcount_dec_and_test(&aio->ref)) {
78		fput(aio->iocb.ki_filp);
79		kmem_cache_free(backing_aio_cachep, aio);
80	}
81}
82
83static void backing_aio_cleanup(struct backing_aio *aio, long res)
84{
85	struct kiocb *iocb = &aio->iocb;
86	struct kiocb *orig_iocb = aio->orig_iocb;
87
88	if (aio->end_write)
89		aio->end_write(orig_iocb->ki_filp);
90
91	orig_iocb->ki_pos = iocb->ki_pos;
92	backing_aio_put(aio);
93}
94
95static void backing_aio_rw_complete(struct kiocb *iocb, long res)
96{
97	struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb);
98	struct kiocb *orig_iocb = aio->orig_iocb;
99
100	if (iocb->ki_flags & IOCB_WRITE)
101		kiocb_end_write(iocb);
102
103	backing_aio_cleanup(aio, res);
104	orig_iocb->ki_complete(orig_iocb, res);
105}
106
107static void backing_aio_complete_work(struct work_struct *work)
108{
109	struct backing_aio *aio = container_of(work, struct backing_aio, work);
110
111	backing_aio_rw_complete(&aio->iocb, aio->res);
112}
113
114static void backing_aio_queue_completion(struct kiocb *iocb, long res)
115{
116	struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb);
117
118	/*
119	 * Punt to a work queue to serialize updates of mtime/size.
120	 */
121	aio->res = res;
122	INIT_WORK(&aio->work, backing_aio_complete_work);
123	queue_work(file_inode(aio->orig_iocb->ki_filp)->i_sb->s_dio_done_wq,
124		   &aio->work);
125}
126
127static int backing_aio_init_wq(struct kiocb *iocb)
128{
129	struct super_block *sb = file_inode(iocb->ki_filp)->i_sb;
130
131	if (sb->s_dio_done_wq)
132		return 0;
133
134	return sb_init_dio_done_wq(sb);
135}
136
137
138ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter,
139			       struct kiocb *iocb, int flags,
140			       struct backing_file_ctx *ctx)
141{
142	struct backing_aio *aio = NULL;
143	const struct cred *old_cred;
144	ssize_t ret;
145
146	if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)))
147		return -EIO;
148
149	if (!iov_iter_count(iter))
150		return 0;
151
152	if (iocb->ki_flags & IOCB_DIRECT &&
153	    !(file->f_mode & FMODE_CAN_ODIRECT))
154		return -EINVAL;
155
156	old_cred = override_creds(ctx->cred);
157	if (is_sync_kiocb(iocb)) {
158		rwf_t rwf = iocb_to_rw_flags(flags);
159
160		ret = vfs_iter_read(file, iter, &iocb->ki_pos, rwf);
161	} else {
162		ret = -ENOMEM;
163		aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL);
164		if (!aio)
165			goto out;
166
167		aio->orig_iocb = iocb;
168		kiocb_clone(&aio->iocb, iocb, get_file(file));
169		aio->iocb.ki_complete = backing_aio_rw_complete;
170		refcount_set(&aio->ref, 2);
171		ret = vfs_iocb_iter_read(file, &aio->iocb, iter);
172		backing_aio_put(aio);
173		if (ret != -EIOCBQUEUED)
174			backing_aio_cleanup(aio, ret);
175	}
176out:
177	revert_creds(old_cred);
178
179	if (ctx->accessed)
180		ctx->accessed(ctx->user_file);
181
182	return ret;
183}
184EXPORT_SYMBOL_GPL(backing_file_read_iter);
185
186ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter,
187				struct kiocb *iocb, int flags,
188				struct backing_file_ctx *ctx)
189{
190	const struct cred *old_cred;
191	ssize_t ret;
192
193	if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)))
194		return -EIO;
195
196	if (!iov_iter_count(iter))
197		return 0;
198
199	ret = file_remove_privs(ctx->user_file);
200	if (ret)
201		return ret;
202
203	if (iocb->ki_flags & IOCB_DIRECT &&
204	    !(file->f_mode & FMODE_CAN_ODIRECT))
205		return -EINVAL;
206
207	/*
208	 * Stacked filesystems don't support deferred completions, don't copy
209	 * this property in case it is set by the issuer.
210	 */
211	flags &= ~IOCB_DIO_CALLER_COMP;
212
213	old_cred = override_creds(ctx->cred);
214	if (is_sync_kiocb(iocb)) {
215		rwf_t rwf = iocb_to_rw_flags(flags);
216
217		ret = vfs_iter_write(file, iter, &iocb->ki_pos, rwf);
218		if (ctx->end_write)
219			ctx->end_write(ctx->user_file);
220	} else {
221		struct backing_aio *aio;
222
223		ret = backing_aio_init_wq(iocb);
224		if (ret)
225			goto out;
226
227		ret = -ENOMEM;
228		aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL);
229		if (!aio)
230			goto out;
231
232		aio->orig_iocb = iocb;
233		aio->end_write = ctx->end_write;
234		kiocb_clone(&aio->iocb, iocb, get_file(file));
235		aio->iocb.ki_flags = flags;
236		aio->iocb.ki_complete = backing_aio_queue_completion;
237		refcount_set(&aio->ref, 2);
238		ret = vfs_iocb_iter_write(file, &aio->iocb, iter);
239		backing_aio_put(aio);
240		if (ret != -EIOCBQUEUED)
241			backing_aio_cleanup(aio, ret);
242	}
243out:
244	revert_creds(old_cred);
245
246	return ret;
247}
248EXPORT_SYMBOL_GPL(backing_file_write_iter);
249
250ssize_t backing_file_splice_read(struct file *in, loff_t *ppos,
251				 struct pipe_inode_info *pipe, size_t len,
252				 unsigned int flags,
253				 struct backing_file_ctx *ctx)
254{
255	const struct cred *old_cred;
256	ssize_t ret;
257
258	if (WARN_ON_ONCE(!(in->f_mode & FMODE_BACKING)))
259		return -EIO;
260
261	old_cred = override_creds(ctx->cred);
262	ret = vfs_splice_read(in, ppos, pipe, len, flags);
263	revert_creds(old_cred);
264
265	if (ctx->accessed)
266		ctx->accessed(ctx->user_file);
267
268	return ret;
269}
270EXPORT_SYMBOL_GPL(backing_file_splice_read);
271
272ssize_t backing_file_splice_write(struct pipe_inode_info *pipe,
273				  struct file *out, loff_t *ppos, size_t len,
274				  unsigned int flags,
275				  struct backing_file_ctx *ctx)
276{
277	const struct cred *old_cred;
278	ssize_t ret;
279
280	if (WARN_ON_ONCE(!(out->f_mode & FMODE_BACKING)))
281		return -EIO;
282
283	ret = file_remove_privs(ctx->user_file);
284	if (ret)
285		return ret;
286
287	old_cred = override_creds(ctx->cred);
288	file_start_write(out);
289	ret = iter_file_splice_write(pipe, out, ppos, len, flags);
290	file_end_write(out);
291	revert_creds(old_cred);
292
293	if (ctx->end_write)
294		ctx->end_write(ctx->user_file);
295
296	return ret;
297}
298EXPORT_SYMBOL_GPL(backing_file_splice_write);
299
300int backing_file_mmap(struct file *file, struct vm_area_struct *vma,
301		      struct backing_file_ctx *ctx)
302{
303	const struct cred *old_cred;
304	int ret;
305
306	if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)) ||
307	    WARN_ON_ONCE(ctx->user_file != vma->vm_file))
308		return -EIO;
309
310	if (!file->f_op->mmap)
311		return -ENODEV;
312
313	vma_set_file(vma, file);
314
315	old_cred = override_creds(ctx->cred);
316	ret = call_mmap(vma->vm_file, vma);
317	revert_creds(old_cred);
318
319	if (ctx->accessed)
320		ctx->accessed(ctx->user_file);
321
322	return ret;
323}
324EXPORT_SYMBOL_GPL(backing_file_mmap);
325
326static int __init backing_aio_init(void)
327{
328	backing_aio_cachep = KMEM_CACHE(backing_aio, SLAB_HWCACHE_ALIGN);
329	if (!backing_aio_cachep)
330		return -ENOMEM;
331
332	return 0;
333}
334fs_initcall(backing_aio_init);
335