1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * mmap.c
4 *
5 * Code to deal with the mess that is clustered mmap.
6 *
7 * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
8 */
9
10#include <linux/fs.h>
11#include <linux/types.h>
12#include <linux/highmem.h>
13#include <linux/pagemap.h>
14#include <linux/uio.h>
15#include <linux/signal.h>
16#include <linux/rbtree.h>
17
18#include <cluster/masklog.h>
19
20#include "ocfs2.h"
21
22#include "aops.h"
23#include "dlmglue.h"
24#include "file.h"
25#include "inode.h"
26#include "mmap.h"
27#include "super.h"
28#include "ocfs2_trace.h"
29
30
31static vm_fault_t ocfs2_fault(struct vm_fault *vmf)
32{
33	struct vm_area_struct *vma = vmf->vma;
34	sigset_t oldset;
35	vm_fault_t ret;
36
37	ocfs2_block_signals(&oldset);
38	ret = filemap_fault(vmf);
39	ocfs2_unblock_signals(&oldset);
40
41	trace_ocfs2_fault(OCFS2_I(vma->vm_file->f_mapping->host)->ip_blkno,
42			  vma, vmf->page, vmf->pgoff);
43	return ret;
44}
45
46static vm_fault_t __ocfs2_page_mkwrite(struct file *file,
47			struct buffer_head *di_bh, struct page *page)
48{
49	int err;
50	vm_fault_t ret = VM_FAULT_NOPAGE;
51	struct inode *inode = file_inode(file);
52	struct address_space *mapping = inode->i_mapping;
53	loff_t pos = page_offset(page);
54	unsigned int len = PAGE_SIZE;
55	pgoff_t last_index;
56	struct page *locked_page = NULL;
57	void *fsdata;
58	loff_t size = i_size_read(inode);
59
60	last_index = (size - 1) >> PAGE_SHIFT;
61
62	/*
63	 * There are cases that lead to the page no longer belonging to the
64	 * mapping.
65	 * 1) pagecache truncates locally due to memory pressure.
66	 * 2) pagecache truncates when another is taking EX lock against
67	 * inode lock. see ocfs2_data_convert_worker.
68	 *
69	 * The i_size check doesn't catch the case where nodes truncated and
70	 * then re-extended the file. We'll re-check the page mapping after
71	 * taking the page lock inside of ocfs2_write_begin_nolock().
72	 *
73	 * Let VM retry with these cases.
74	 */
75	if ((page->mapping != inode->i_mapping) ||
76	    (!PageUptodate(page)) ||
77	    (page_offset(page) >= size))
78		goto out;
79
80	/*
81	 * Call ocfs2_write_begin() and ocfs2_write_end() to take
82	 * advantage of the allocation code there. We pass a write
83	 * length of the whole page (chopped to i_size) to make sure
84	 * the whole thing is allocated.
85	 *
86	 * Since we know the page is up to date, we don't have to
87	 * worry about ocfs2_write_begin() skipping some buffer reads
88	 * because the "write" would invalidate their data.
89	 */
90	if (page->index == last_index)
91		len = ((size - 1) & ~PAGE_MASK) + 1;
92
93	err = ocfs2_write_begin_nolock(mapping, pos, len, OCFS2_WRITE_MMAP,
94				       &locked_page, &fsdata, di_bh, page);
95	if (err) {
96		if (err != -ENOSPC)
97			mlog_errno(err);
98		ret = vmf_error(err);
99		goto out;
100	}
101
102	if (!locked_page) {
103		ret = VM_FAULT_NOPAGE;
104		goto out;
105	}
106	err = ocfs2_write_end_nolock(mapping, pos, len, len, fsdata);
107	BUG_ON(err != len);
108	ret = VM_FAULT_LOCKED;
109out:
110	return ret;
111}
112
113static vm_fault_t ocfs2_page_mkwrite(struct vm_fault *vmf)
114{
115	struct page *page = vmf->page;
116	struct inode *inode = file_inode(vmf->vma->vm_file);
117	struct buffer_head *di_bh = NULL;
118	sigset_t oldset;
119	int err;
120	vm_fault_t ret;
121
122	sb_start_pagefault(inode->i_sb);
123	ocfs2_block_signals(&oldset);
124
125	/*
126	 * The cluster locks taken will block a truncate from another
127	 * node. Taking the data lock will also ensure that we don't
128	 * attempt page truncation as part of a downconvert.
129	 */
130	err = ocfs2_inode_lock(inode, &di_bh, 1);
131	if (err < 0) {
132		mlog_errno(err);
133		ret = vmf_error(err);
134		goto out;
135	}
136
137	/*
138	 * The alloc sem should be enough to serialize with
139	 * ocfs2_truncate_file() changing i_size as well as any thread
140	 * modifying the inode btree.
141	 */
142	down_write(&OCFS2_I(inode)->ip_alloc_sem);
143
144	ret = __ocfs2_page_mkwrite(vmf->vma->vm_file, di_bh, page);
145
146	up_write(&OCFS2_I(inode)->ip_alloc_sem);
147
148	brelse(di_bh);
149	ocfs2_inode_unlock(inode, 1);
150
151out:
152	ocfs2_unblock_signals(&oldset);
153	sb_end_pagefault(inode->i_sb);
154	return ret;
155}
156
157static const struct vm_operations_struct ocfs2_file_vm_ops = {
158	.fault		= ocfs2_fault,
159	.page_mkwrite	= ocfs2_page_mkwrite,
160};
161
162int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
163{
164	int ret = 0, lock_level = 0;
165
166	ret = ocfs2_inode_lock_atime(file_inode(file),
167				    file->f_path.mnt, &lock_level, 1);
168	if (ret < 0) {
169		mlog_errno(ret);
170		goto out;
171	}
172	ocfs2_inode_unlock(file_inode(file), lock_level);
173out:
174	vma->vm_ops = &ocfs2_file_vm_ops;
175	return 0;
176}
177
178