1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2017 Red Hat, Inc.
4 */
5
6#include <linux/cred.h>
7#include <linux/file.h>
8#include <linux/mount.h>
9#include <linux/xattr.h>
10#include <linux/uio.h>
11#include <linux/uaccess.h>
12#include <linux/security.h>
13#include <linux/fs.h>
14#include <linux/backing-file.h>
15#include "overlayfs.h"
16
17static char ovl_whatisit(struct inode *inode, struct inode *realinode)
18{
19	if (realinode != ovl_inode_upper(inode))
20		return 'l';
21	if (ovl_has_upperdata(inode))
22		return 'u';
23	else
24		return 'm';
25}
26
27/* No atime modification on underlying */
28#define OVL_OPEN_FLAGS (O_NOATIME)
29
30static struct file *ovl_open_realfile(const struct file *file,
31				      const struct path *realpath)
32{
33	struct inode *realinode = d_inode(realpath->dentry);
34	struct inode *inode = file_inode(file);
35	struct mnt_idmap *real_idmap;
36	struct file *realfile;
37	const struct cred *old_cred;
38	int flags = file->f_flags | OVL_OPEN_FLAGS;
39	int acc_mode = ACC_MODE(flags);
40	int err;
41
42	if (flags & O_APPEND)
43		acc_mode |= MAY_APPEND;
44
45	old_cred = ovl_override_creds(inode->i_sb);
46	real_idmap = mnt_idmap(realpath->mnt);
47	err = inode_permission(real_idmap, realinode, MAY_OPEN | acc_mode);
48	if (err) {
49		realfile = ERR_PTR(err);
50	} else {
51		if (!inode_owner_or_capable(real_idmap, realinode))
52			flags &= ~O_NOATIME;
53
54		realfile = backing_file_open(&file->f_path, flags, realpath,
55					     current_cred());
56	}
57	revert_creds(old_cred);
58
59	pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
60		 file, file, ovl_whatisit(inode, realinode), file->f_flags,
61		 realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
62
63	return realfile;
64}
65
66#define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
67
68static int ovl_change_flags(struct file *file, unsigned int flags)
69{
70	struct inode *inode = file_inode(file);
71	int err;
72
73	flags &= OVL_SETFL_MASK;
74
75	if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
76		return -EPERM;
77
78	if ((flags & O_DIRECT) && !(file->f_mode & FMODE_CAN_ODIRECT))
79		return -EINVAL;
80
81	if (file->f_op->check_flags) {
82		err = file->f_op->check_flags(flags);
83		if (err)
84			return err;
85	}
86
87	spin_lock(&file->f_lock);
88	file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags;
89	file->f_iocb_flags = iocb_flags(file);
90	spin_unlock(&file->f_lock);
91
92	return 0;
93}
94
95static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
96			       bool allow_meta)
97{
98	struct dentry *dentry = file_dentry(file);
99	struct path realpath;
100	int err;
101
102	real->flags = 0;
103	real->file = file->private_data;
104
105	if (allow_meta) {
106		ovl_path_real(dentry, &realpath);
107	} else {
108		/* lazy lookup and verify of lowerdata */
109		err = ovl_verify_lowerdata(dentry);
110		if (err)
111			return err;
112
113		ovl_path_realdata(dentry, &realpath);
114	}
115	if (!realpath.dentry)
116		return -EIO;
117
118	/* Has it been copied up since we'd opened it? */
119	if (unlikely(file_inode(real->file) != d_inode(realpath.dentry))) {
120		real->flags = FDPUT_FPUT;
121		real->file = ovl_open_realfile(file, &realpath);
122
123		return PTR_ERR_OR_ZERO(real->file);
124	}
125
126	/* Did the flags change since open? */
127	if (unlikely((file->f_flags ^ real->file->f_flags) & ~OVL_OPEN_FLAGS))
128		return ovl_change_flags(real->file, file->f_flags);
129
130	return 0;
131}
132
133static int ovl_real_fdget(const struct file *file, struct fd *real)
134{
135	if (d_is_dir(file_dentry(file))) {
136		real->flags = 0;
137		real->file = ovl_dir_real_file(file, false);
138
139		return PTR_ERR_OR_ZERO(real->file);
140	}
141
142	return ovl_real_fdget_meta(file, real, false);
143}
144
145static int ovl_open(struct inode *inode, struct file *file)
146{
147	struct dentry *dentry = file_dentry(file);
148	struct file *realfile;
149	struct path realpath;
150	int err;
151
152	/* lazy lookup and verify lowerdata */
153	err = ovl_verify_lowerdata(dentry);
154	if (err)
155		return err;
156
157	err = ovl_maybe_copy_up(dentry, file->f_flags);
158	if (err)
159		return err;
160
161	/* No longer need these flags, so don't pass them on to underlying fs */
162	file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
163
164	ovl_path_realdata(dentry, &realpath);
165	if (!realpath.dentry)
166		return -EIO;
167
168	realfile = ovl_open_realfile(file, &realpath);
169	if (IS_ERR(realfile))
170		return PTR_ERR(realfile);
171
172	file->private_data = realfile;
173
174	return 0;
175}
176
177static int ovl_release(struct inode *inode, struct file *file)
178{
179	fput(file->private_data);
180
181	return 0;
182}
183
184static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
185{
186	struct inode *inode = file_inode(file);
187	struct fd real;
188	const struct cred *old_cred;
189	loff_t ret;
190
191	/*
192	 * The two special cases below do not need to involve real fs,
193	 * so we can optimizing concurrent callers.
194	 */
195	if (offset == 0) {
196		if (whence == SEEK_CUR)
197			return file->f_pos;
198
199		if (whence == SEEK_SET)
200			return vfs_setpos(file, 0, 0);
201	}
202
203	ret = ovl_real_fdget(file, &real);
204	if (ret)
205		return ret;
206
207	/*
208	 * Overlay file f_pos is the master copy that is preserved
209	 * through copy up and modified on read/write, but only real
210	 * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose
211	 * limitations that are more strict than ->s_maxbytes for specific
212	 * files, so we use the real file to perform seeks.
213	 */
214	ovl_inode_lock(inode);
215	real.file->f_pos = file->f_pos;
216
217	old_cred = ovl_override_creds(inode->i_sb);
218	ret = vfs_llseek(real.file, offset, whence);
219	revert_creds(old_cred);
220
221	file->f_pos = real.file->f_pos;
222	ovl_inode_unlock(inode);
223
224	fdput(real);
225
226	return ret;
227}
228
229static void ovl_file_modified(struct file *file)
230{
231	/* Update size/mtime */
232	ovl_copyattr(file_inode(file));
233}
234
235static void ovl_file_accessed(struct file *file)
236{
237	struct inode *inode, *upperinode;
238	struct timespec64 ctime, uctime;
239	struct timespec64 mtime, umtime;
240
241	if (file->f_flags & O_NOATIME)
242		return;
243
244	inode = file_inode(file);
245	upperinode = ovl_inode_upper(inode);
246
247	if (!upperinode)
248		return;
249
250	ctime = inode_get_ctime(inode);
251	uctime = inode_get_ctime(upperinode);
252	mtime = inode_get_mtime(inode);
253	umtime = inode_get_mtime(upperinode);
254	if ((!timespec64_equal(&mtime, &umtime)) ||
255	     !timespec64_equal(&ctime, &uctime)) {
256		inode_set_mtime_to_ts(inode, inode_get_mtime(upperinode));
257		inode_set_ctime_to_ts(inode, uctime);
258	}
259
260	touch_atime(&file->f_path);
261}
262
263static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
264{
265	struct file *file = iocb->ki_filp;
266	struct fd real;
267	ssize_t ret;
268	struct backing_file_ctx ctx = {
269		.cred = ovl_creds(file_inode(file)->i_sb),
270		.user_file = file,
271		.accessed = ovl_file_accessed,
272	};
273
274	if (!iov_iter_count(iter))
275		return 0;
276
277	ret = ovl_real_fdget(file, &real);
278	if (ret)
279		return ret;
280
281	ret = backing_file_read_iter(real.file, iter, iocb, iocb->ki_flags,
282				     &ctx);
283	fdput(real);
284
285	return ret;
286}
287
288static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
289{
290	struct file *file = iocb->ki_filp;
291	struct inode *inode = file_inode(file);
292	struct fd real;
293	ssize_t ret;
294	int ifl = iocb->ki_flags;
295	struct backing_file_ctx ctx = {
296		.cred = ovl_creds(inode->i_sb),
297		.user_file = file,
298		.end_write = ovl_file_modified,
299	};
300
301	if (!iov_iter_count(iter))
302		return 0;
303
304	inode_lock(inode);
305	/* Update mode */
306	ovl_copyattr(inode);
307
308	ret = ovl_real_fdget(file, &real);
309	if (ret)
310		goto out_unlock;
311
312	if (!ovl_should_sync(OVL_FS(inode->i_sb)))
313		ifl &= ~(IOCB_DSYNC | IOCB_SYNC);
314
315	/*
316	 * Overlayfs doesn't support deferred completions, don't copy
317	 * this property in case it is set by the issuer.
318	 */
319	ifl &= ~IOCB_DIO_CALLER_COMP;
320	ret = backing_file_write_iter(real.file, iter, iocb, ifl, &ctx);
321	fdput(real);
322
323out_unlock:
324	inode_unlock(inode);
325
326	return ret;
327}
328
329static ssize_t ovl_splice_read(struct file *in, loff_t *ppos,
330			       struct pipe_inode_info *pipe, size_t len,
331			       unsigned int flags)
332{
333	struct fd real;
334	ssize_t ret;
335	struct backing_file_ctx ctx = {
336		.cred = ovl_creds(file_inode(in)->i_sb),
337		.user_file = in,
338		.accessed = ovl_file_accessed,
339	};
340
341	ret = ovl_real_fdget(in, &real);
342	if (ret)
343		return ret;
344
345	ret = backing_file_splice_read(real.file, ppos, pipe, len, flags, &ctx);
346	fdput(real);
347
348	return ret;
349}
350
351/*
352 * Calling iter_file_splice_write() directly from overlay's f_op may deadlock
353 * due to lock order inversion between pipe->mutex in iter_file_splice_write()
354 * and file_start_write(real.file) in ovl_write_iter().
355 *
356 * So do everything ovl_write_iter() does and call iter_file_splice_write() on
357 * the real file.
358 */
359static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
360				loff_t *ppos, size_t len, unsigned int flags)
361{
362	struct fd real;
363	struct inode *inode = file_inode(out);
364	ssize_t ret;
365	struct backing_file_ctx ctx = {
366		.cred = ovl_creds(inode->i_sb),
367		.user_file = out,
368		.end_write = ovl_file_modified,
369	};
370
371	inode_lock(inode);
372	/* Update mode */
373	ovl_copyattr(inode);
374
375	ret = ovl_real_fdget(out, &real);
376	if (ret)
377		goto out_unlock;
378
379	ret = backing_file_splice_write(pipe, real.file, ppos, len, flags, &ctx);
380	fdput(real);
381
382out_unlock:
383	inode_unlock(inode);
384
385	return ret;
386}
387
388static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
389{
390	struct fd real;
391	const struct cred *old_cred;
392	int ret;
393
394	ret = ovl_sync_status(OVL_FS(file_inode(file)->i_sb));
395	if (ret <= 0)
396		return ret;
397
398	ret = ovl_real_fdget_meta(file, &real, !datasync);
399	if (ret)
400		return ret;
401
402	/* Don't sync lower file for fear of receiving EROFS error */
403	if (file_inode(real.file) == ovl_inode_upper(file_inode(file))) {
404		old_cred = ovl_override_creds(file_inode(file)->i_sb);
405		ret = vfs_fsync_range(real.file, start, end, datasync);
406		revert_creds(old_cred);
407	}
408
409	fdput(real);
410
411	return ret;
412}
413
414static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
415{
416	struct file *realfile = file->private_data;
417	struct backing_file_ctx ctx = {
418		.cred = ovl_creds(file_inode(file)->i_sb),
419		.user_file = file,
420		.accessed = ovl_file_accessed,
421	};
422
423	return backing_file_mmap(realfile, vma, &ctx);
424}
425
426static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
427{
428	struct inode *inode = file_inode(file);
429	struct fd real;
430	const struct cred *old_cred;
431	int ret;
432
433	inode_lock(inode);
434	/* Update mode */
435	ovl_copyattr(inode);
436	ret = file_remove_privs(file);
437	if (ret)
438		goto out_unlock;
439
440	ret = ovl_real_fdget(file, &real);
441	if (ret)
442		goto out_unlock;
443
444	old_cred = ovl_override_creds(file_inode(file)->i_sb);
445	ret = vfs_fallocate(real.file, mode, offset, len);
446	revert_creds(old_cred);
447
448	/* Update size */
449	ovl_file_modified(file);
450
451	fdput(real);
452
453out_unlock:
454	inode_unlock(inode);
455
456	return ret;
457}
458
459static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
460{
461	struct fd real;
462	const struct cred *old_cred;
463	int ret;
464
465	ret = ovl_real_fdget(file, &real);
466	if (ret)
467		return ret;
468
469	old_cred = ovl_override_creds(file_inode(file)->i_sb);
470	ret = vfs_fadvise(real.file, offset, len, advice);
471	revert_creds(old_cred);
472
473	fdput(real);
474
475	return ret;
476}
477
478enum ovl_copyop {
479	OVL_COPY,
480	OVL_CLONE,
481	OVL_DEDUPE,
482};
483
484static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
485			    struct file *file_out, loff_t pos_out,
486			    loff_t len, unsigned int flags, enum ovl_copyop op)
487{
488	struct inode *inode_out = file_inode(file_out);
489	struct fd real_in, real_out;
490	const struct cred *old_cred;
491	loff_t ret;
492
493	inode_lock(inode_out);
494	if (op != OVL_DEDUPE) {
495		/* Update mode */
496		ovl_copyattr(inode_out);
497		ret = file_remove_privs(file_out);
498		if (ret)
499			goto out_unlock;
500	}
501
502	ret = ovl_real_fdget(file_out, &real_out);
503	if (ret)
504		goto out_unlock;
505
506	ret = ovl_real_fdget(file_in, &real_in);
507	if (ret) {
508		fdput(real_out);
509		goto out_unlock;
510	}
511
512	old_cred = ovl_override_creds(file_inode(file_out)->i_sb);
513	switch (op) {
514	case OVL_COPY:
515		ret = vfs_copy_file_range(real_in.file, pos_in,
516					  real_out.file, pos_out, len, flags);
517		break;
518
519	case OVL_CLONE:
520		ret = vfs_clone_file_range(real_in.file, pos_in,
521					   real_out.file, pos_out, len, flags);
522		break;
523
524	case OVL_DEDUPE:
525		ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
526						real_out.file, pos_out, len,
527						flags);
528		break;
529	}
530	revert_creds(old_cred);
531
532	/* Update size */
533	ovl_file_modified(file_out);
534
535	fdput(real_in);
536	fdput(real_out);
537
538out_unlock:
539	inode_unlock(inode_out);
540
541	return ret;
542}
543
544static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
545				   struct file *file_out, loff_t pos_out,
546				   size_t len, unsigned int flags)
547{
548	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
549			    OVL_COPY);
550}
551
552static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in,
553				   struct file *file_out, loff_t pos_out,
554				   loff_t len, unsigned int remap_flags)
555{
556	enum ovl_copyop op;
557
558	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
559		return -EINVAL;
560
561	if (remap_flags & REMAP_FILE_DEDUP)
562		op = OVL_DEDUPE;
563	else
564		op = OVL_CLONE;
565
566	/*
567	 * Don't copy up because of a dedupe request, this wouldn't make sense
568	 * most of the time (data would be duplicated instead of deduplicated).
569	 */
570	if (op == OVL_DEDUPE &&
571	    (!ovl_inode_upper(file_inode(file_in)) ||
572	     !ovl_inode_upper(file_inode(file_out))))
573		return -EPERM;
574
575	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len,
576			    remap_flags, op);
577}
578
579static int ovl_flush(struct file *file, fl_owner_t id)
580{
581	struct fd real;
582	const struct cred *old_cred;
583	int err;
584
585	err = ovl_real_fdget(file, &real);
586	if (err)
587		return err;
588
589	if (real.file->f_op->flush) {
590		old_cred = ovl_override_creds(file_inode(file)->i_sb);
591		err = real.file->f_op->flush(real.file, id);
592		revert_creds(old_cred);
593	}
594	fdput(real);
595
596	return err;
597}
598
599const struct file_operations ovl_file_operations = {
600	.open		= ovl_open,
601	.release	= ovl_release,
602	.llseek		= ovl_llseek,
603	.read_iter	= ovl_read_iter,
604	.write_iter	= ovl_write_iter,
605	.fsync		= ovl_fsync,
606	.mmap		= ovl_mmap,
607	.fallocate	= ovl_fallocate,
608	.fadvise	= ovl_fadvise,
609	.flush		= ovl_flush,
610	.splice_read    = ovl_splice_read,
611	.splice_write   = ovl_splice_write,
612
613	.copy_file_range	= ovl_copy_file_range,
614	.remap_file_range	= ovl_remap_file_range,
615};
616