1// SPDX-License-Identifier: GPL-2.0
2/*
3 *  linux/fs/ext2/file.c
4 *
5 * Copyright (C) 1992, 1993, 1994, 1995
6 * Remy Card (card@masi.ibp.fr)
7 * Laboratoire MASI - Institut Blaise Pascal
8 * Universite Pierre et Marie Curie (Paris VI)
9 *
10 *  from
11 *
12 *  linux/fs/minix/file.c
13 *
14 *  Copyright (C) 1991, 1992  Linus Torvalds
15 *
16 *  ext2 fs regular file handling primitives
17 *
18 *  64-bit file support on 64-bit platforms by Jakub Jelinek
19 * 	(jj@sunsite.ms.mff.cuni.cz)
20 */
21
22#include <linux/time.h>
23#include <linux/pagemap.h>
24#include <linux/dax.h>
25#include <linux/quotaops.h>
26#include <linux/iomap.h>
27#include <linux/uio.h>
28#include <linux/buffer_head.h>
29#include "ext2.h"
30#include "xattr.h"
31#include "acl.h"
32#include "trace.h"
33
34#ifdef CONFIG_FS_DAX
35static ssize_t ext2_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
36{
37	struct inode *inode = iocb->ki_filp->f_mapping->host;
38	ssize_t ret;
39
40	if (!iov_iter_count(to))
41		return 0; /* skip atime */
42
43	inode_lock_shared(inode);
44	ret = dax_iomap_rw(iocb, to, &ext2_iomap_ops);
45	inode_unlock_shared(inode);
46
47	file_accessed(iocb->ki_filp);
48	return ret;
49}
50
51static ssize_t ext2_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
52{
53	struct file *file = iocb->ki_filp;
54	struct inode *inode = file->f_mapping->host;
55	ssize_t ret;
56
57	inode_lock(inode);
58	ret = generic_write_checks(iocb, from);
59	if (ret <= 0)
60		goto out_unlock;
61	ret = file_remove_privs(file);
62	if (ret)
63		goto out_unlock;
64	ret = file_update_time(file);
65	if (ret)
66		goto out_unlock;
67
68	ret = dax_iomap_rw(iocb, from, &ext2_iomap_ops);
69	if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
70		i_size_write(inode, iocb->ki_pos);
71		mark_inode_dirty(inode);
72	}
73
74out_unlock:
75	inode_unlock(inode);
76	if (ret > 0)
77		ret = generic_write_sync(iocb, ret);
78	return ret;
79}
80
81/*
82 * The lock ordering for ext2 DAX fault paths is:
83 *
84 * mmap_lock (MM)
85 *   sb_start_pagefault (vfs, freeze)
86 *     address_space->invalidate_lock
87 *       address_space->i_mmap_rwsem or page_lock (mutually exclusive in DAX)
88 *         ext2_inode_info->truncate_mutex
89 *
90 * The default page_lock and i_size verification done by non-DAX fault paths
91 * is sufficient because ext2 doesn't support hole punching.
92 */
93static vm_fault_t ext2_dax_fault(struct vm_fault *vmf)
94{
95	struct inode *inode = file_inode(vmf->vma->vm_file);
96	vm_fault_t ret;
97	bool write = (vmf->flags & FAULT_FLAG_WRITE) &&
98		(vmf->vma->vm_flags & VM_SHARED);
99
100	if (write) {
101		sb_start_pagefault(inode->i_sb);
102		file_update_time(vmf->vma->vm_file);
103	}
104	filemap_invalidate_lock_shared(inode->i_mapping);
105
106	ret = dax_iomap_fault(vmf, 0, NULL, NULL, &ext2_iomap_ops);
107
108	filemap_invalidate_unlock_shared(inode->i_mapping);
109	if (write)
110		sb_end_pagefault(inode->i_sb);
111	return ret;
112}
113
114static const struct vm_operations_struct ext2_dax_vm_ops = {
115	.fault		= ext2_dax_fault,
116	/*
117	 * .huge_fault is not supported for DAX because allocation in ext2
118	 * cannot be reliably aligned to huge page sizes and so pmd faults
119	 * will always fail and fail back to regular faults.
120	 */
121	.page_mkwrite	= ext2_dax_fault,
122	.pfn_mkwrite	= ext2_dax_fault,
123};
124
125static int ext2_file_mmap(struct file *file, struct vm_area_struct *vma)
126{
127	if (!IS_DAX(file_inode(file)))
128		return generic_file_mmap(file, vma);
129
130	file_accessed(file);
131	vma->vm_ops = &ext2_dax_vm_ops;
132	return 0;
133}
134#else
135#define ext2_file_mmap	generic_file_mmap
136#endif
137
138/*
139 * Called when filp is released. This happens when all file descriptors
140 * for a single struct file are closed. Note that different open() calls
141 * for the same file yield different struct file structures.
142 */
143static int ext2_release_file (struct inode * inode, struct file * filp)
144{
145	if (filp->f_mode & FMODE_WRITE) {
146		mutex_lock(&EXT2_I(inode)->truncate_mutex);
147		ext2_discard_reservation(inode);
148		mutex_unlock(&EXT2_I(inode)->truncate_mutex);
149	}
150	return 0;
151}
152
153int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
154{
155	int ret;
156	struct super_block *sb = file->f_mapping->host->i_sb;
157
158	ret = generic_buffers_fsync(file, start, end, datasync);
159	if (ret == -EIO)
160		/* We don't really know where the IO error happened... */
161		ext2_error(sb, __func__,
162			   "detected IO error when writing metadata buffers");
163	return ret;
164}
165
166static ssize_t ext2_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
167{
168	struct file *file = iocb->ki_filp;
169	struct inode *inode = file->f_mapping->host;
170	ssize_t ret;
171
172	trace_ext2_dio_read_begin(iocb, to, 0);
173	inode_lock_shared(inode);
174	ret = iomap_dio_rw(iocb, to, &ext2_iomap_ops, NULL, 0, NULL, 0);
175	inode_unlock_shared(inode);
176	trace_ext2_dio_read_end(iocb, to, ret);
177
178	return ret;
179}
180
181static int ext2_dio_write_end_io(struct kiocb *iocb, ssize_t size,
182				 int error, unsigned int flags)
183{
184	loff_t pos = iocb->ki_pos;
185	struct inode *inode = file_inode(iocb->ki_filp);
186
187	if (error)
188		goto out;
189
190	/*
191	 * If we are extending the file, we have to update i_size here before
192	 * page cache gets invalidated in iomap_dio_rw(). This prevents racing
193	 * buffered reads from zeroing out too much from page cache pages.
194	 * Note that all extending writes always happens synchronously with
195	 * inode lock held by ext2_dio_write_iter(). So it is safe to update
196	 * inode size here for extending file writes.
197	 */
198	pos += size;
199	if (pos > i_size_read(inode)) {
200		i_size_write(inode, pos);
201		mark_inode_dirty(inode);
202	}
203out:
204	trace_ext2_dio_write_endio(iocb, size, error);
205	return error;
206}
207
208static const struct iomap_dio_ops ext2_dio_write_ops = {
209	.end_io = ext2_dio_write_end_io,
210};
211
212static ssize_t ext2_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
213{
214	struct file *file = iocb->ki_filp;
215	struct inode *inode = file->f_mapping->host;
216	ssize_t ret;
217	unsigned int flags = 0;
218	unsigned long blocksize = inode->i_sb->s_blocksize;
219	loff_t offset = iocb->ki_pos;
220	loff_t count = iov_iter_count(from);
221	ssize_t status = 0;
222
223	trace_ext2_dio_write_begin(iocb, from, 0);
224	inode_lock(inode);
225	ret = generic_write_checks(iocb, from);
226	if (ret <= 0)
227		goto out_unlock;
228
229	ret = kiocb_modified(iocb);
230	if (ret)
231		goto out_unlock;
232
233	/* use IOMAP_DIO_FORCE_WAIT for unaligned or extending writes */
234	if (iocb->ki_pos + iov_iter_count(from) > i_size_read(inode) ||
235	   (!IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(from), blocksize)))
236		flags |= IOMAP_DIO_FORCE_WAIT;
237
238	ret = iomap_dio_rw(iocb, from, &ext2_iomap_ops, &ext2_dio_write_ops,
239			   flags, NULL, 0);
240
241	/* ENOTBLK is magic return value for fallback to buffered-io */
242	if (ret == -ENOTBLK)
243		ret = 0;
244
245	if (ret < 0 && ret != -EIOCBQUEUED)
246		ext2_write_failed(inode->i_mapping, offset + count);
247
248	/* handle case for partial write and for fallback to buffered write */
249	if (ret >= 0 && iov_iter_count(from)) {
250		loff_t pos, endbyte;
251		int ret2;
252
253		iocb->ki_flags &= ~IOCB_DIRECT;
254		pos = iocb->ki_pos;
255		status = generic_perform_write(iocb, from);
256		if (unlikely(status < 0)) {
257			ret = status;
258			goto out_unlock;
259		}
260
261		ret += status;
262		endbyte = pos + status - 1;
263		ret2 = filemap_write_and_wait_range(inode->i_mapping, pos,
264						    endbyte);
265		if (!ret2)
266			invalidate_mapping_pages(inode->i_mapping,
267						 pos >> PAGE_SHIFT,
268						 endbyte >> PAGE_SHIFT);
269		if (ret > 0)
270			generic_write_sync(iocb, ret);
271	}
272
273out_unlock:
274	inode_unlock(inode);
275	if (status)
276		trace_ext2_dio_write_buff_end(iocb, from, status);
277	trace_ext2_dio_write_end(iocb, from, ret);
278	return ret;
279}
280
281static ssize_t ext2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
282{
283#ifdef CONFIG_FS_DAX
284	if (IS_DAX(iocb->ki_filp->f_mapping->host))
285		return ext2_dax_read_iter(iocb, to);
286#endif
287	if (iocb->ki_flags & IOCB_DIRECT)
288		return ext2_dio_read_iter(iocb, to);
289
290	return generic_file_read_iter(iocb, to);
291}
292
293static ssize_t ext2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
294{
295#ifdef CONFIG_FS_DAX
296	if (IS_DAX(iocb->ki_filp->f_mapping->host))
297		return ext2_dax_write_iter(iocb, from);
298#endif
299	if (iocb->ki_flags & IOCB_DIRECT)
300		return ext2_dio_write_iter(iocb, from);
301
302	return generic_file_write_iter(iocb, from);
303}
304
305const struct file_operations ext2_file_operations = {
306	.llseek		= generic_file_llseek,
307	.read_iter	= ext2_file_read_iter,
308	.write_iter	= ext2_file_write_iter,
309	.unlocked_ioctl = ext2_ioctl,
310#ifdef CONFIG_COMPAT
311	.compat_ioctl	= ext2_compat_ioctl,
312#endif
313	.mmap		= ext2_file_mmap,
314	.open		= dquot_file_open,
315	.release	= ext2_release_file,
316	.fsync		= ext2_fsync,
317	.get_unmapped_area = thp_get_unmapped_area,
318	.splice_read	= filemap_splice_read,
319	.splice_write	= iter_file_splice_write,
320};
321
322const struct inode_operations ext2_file_inode_operations = {
323	.listxattr	= ext2_listxattr,
324	.getattr	= ext2_getattr,
325	.setattr	= ext2_setattr,
326	.get_inode_acl	= ext2_get_acl,
327	.set_acl	= ext2_set_acl,
328	.fiemap		= ext2_fiemap,
329	.fileattr_get	= ext2_fileattr_get,
330	.fileattr_set	= ext2_fileattr_set,
331};
332