1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Copyright (C) 2018-2023 Oracle.  All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6#include "xfs.h"
7#include "xfs_fs.h"
8#include "xfs_shared.h"
9#include "xfs_format.h"
10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h"
12#include "xfs_mount.h"
13#include "scrub/xfile.h"
14#include "scrub/xfarray.h"
15#include "scrub/scrub.h"
16#include "scrub/trace.h"
17#include <linux/shmem_fs.h>
18
19/*
20 * Swappable Temporary Memory
21 * ==========================
22 *
23 * Online checking sometimes needs to be able to stage a large amount of data
24 * in memory.  This information might not fit in the available memory and it
25 * doesn't all need to be accessible at all times.  In other words, we want an
26 * indexed data buffer to store data that can be paged out.
27 *
28 * When CONFIG_TMPFS=y, shmemfs is enough of a filesystem to meet those
29 * requirements.  Therefore, the xfile mechanism uses an unlinked shmem file to
30 * store our staging data.  This file is not installed in the file descriptor
31 * table so that user programs cannot access the data, which means that the
32 * xfile must be freed with xfile_destroy.
33 *
34 * xfiles assume that the caller will handle all required concurrency
35 * management; standard vfs locks (freezer and inode) are not taken.  Reads
36 * and writes are satisfied directly from the page cache.
37 */
38
39/*
40 * xfiles must not be exposed to userspace and require upper layers to
41 * coordinate access to the one handle returned by the constructor, so
42 * establish a separate lock class for xfiles to avoid confusing lockdep.
43 */
44static struct lock_class_key xfile_i_mutex_key;
45
46/*
47 * Create an xfile of the given size.  The description will be used in the
48 * trace output.
49 */
50int
51xfile_create(
52	const char		*description,
53	loff_t			isize,
54	struct xfile		**xfilep)
55{
56	struct inode		*inode;
57	struct xfile		*xf;
58	int			error;
59
60	xf = kmalloc(sizeof(struct xfile), XCHK_GFP_FLAGS);
61	if (!xf)
62		return -ENOMEM;
63
64	xf->file = shmem_kernel_file_setup(description, isize, VM_NORESERVE);
65	if (IS_ERR(xf->file)) {
66		error = PTR_ERR(xf->file);
67		goto out_xfile;
68	}
69
70	inode = file_inode(xf->file);
71	lockdep_set_class(&inode->i_rwsem, &xfile_i_mutex_key);
72
73	/*
74	 * We don't want to bother with kmapping data during repair, so don't
75	 * allow highmem pages to back this mapping.
76	 */
77	mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);
78
79	trace_xfile_create(xf);
80
81	*xfilep = xf;
82	return 0;
83out_xfile:
84	kfree(xf);
85	return error;
86}
87
88/* Close the file and release all resources. */
89void
90xfile_destroy(
91	struct xfile		*xf)
92{
93	struct inode		*inode = file_inode(xf->file);
94
95	trace_xfile_destroy(xf);
96
97	lockdep_set_class(&inode->i_rwsem, &inode->i_sb->s_type->i_mutex_key);
98	fput(xf->file);
99	kfree(xf);
100}
101
102/*
103 * Load an object.  Since we're treating this file as "memory", any error or
104 * short IO is treated as a failure to allocate memory.
105 */
106int
107xfile_load(
108	struct xfile		*xf,
109	void			*buf,
110	size_t			count,
111	loff_t			pos)
112{
113	struct inode		*inode = file_inode(xf->file);
114	unsigned int		pflags;
115
116	if (count > MAX_RW_COUNT)
117		return -ENOMEM;
118	if (inode->i_sb->s_maxbytes - pos < count)
119		return -ENOMEM;
120
121	trace_xfile_load(xf, pos, count);
122
123	pflags = memalloc_nofs_save();
124	while (count > 0) {
125		struct folio	*folio;
126		unsigned int	len;
127		unsigned int	offset;
128
129		if (shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio,
130				SGP_READ) < 0)
131			break;
132		if (!folio) {
133			/*
134			 * No data stored at this offset, just zero the output
135			 * buffer until the next page boundary.
136			 */
137			len = min_t(ssize_t, count,
138				PAGE_SIZE - offset_in_page(pos));
139			memset(buf, 0, len);
140		} else {
141			if (filemap_check_wb_err(inode->i_mapping, 0)) {
142				folio_unlock(folio);
143				folio_put(folio);
144				break;
145			}
146
147			offset = offset_in_folio(folio, pos);
148			len = min_t(ssize_t, count, folio_size(folio) - offset);
149			memcpy(buf, folio_address(folio) + offset, len);
150
151			folio_unlock(folio);
152			folio_put(folio);
153		}
154		count -= len;
155		pos += len;
156		buf += len;
157	}
158	memalloc_nofs_restore(pflags);
159
160	if (count)
161		return -ENOMEM;
162	return 0;
163}
164
165/*
166 * Store an object.  Since we're treating this file as "memory", any error or
167 * short IO is treated as a failure to allocate memory.
168 */
169int
170xfile_store(
171	struct xfile		*xf,
172	const void		*buf,
173	size_t			count,
174	loff_t			pos)
175{
176	struct inode		*inode = file_inode(xf->file);
177	unsigned int		pflags;
178
179	if (count > MAX_RW_COUNT)
180		return -ENOMEM;
181	if (inode->i_sb->s_maxbytes - pos < count)
182		return -ENOMEM;
183
184	trace_xfile_store(xf, pos, count);
185
186	/*
187	 * Increase the file size first so that shmem_get_folio(..., SGP_CACHE),
188	 * actually allocates a folio instead of erroring out.
189	 */
190	if (pos + count > i_size_read(inode))
191		i_size_write(inode, pos + count);
192
193	pflags = memalloc_nofs_save();
194	while (count > 0) {
195		struct folio	*folio;
196		unsigned int	len;
197		unsigned int	offset;
198
199		if (shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio,
200				SGP_CACHE) < 0)
201			break;
202		if (filemap_check_wb_err(inode->i_mapping, 0)) {
203			folio_unlock(folio);
204			folio_put(folio);
205			break;
206		}
207
208		offset = offset_in_folio(folio, pos);
209		len = min_t(ssize_t, count, folio_size(folio) - offset);
210		memcpy(folio_address(folio) + offset, buf, len);
211
212		folio_mark_dirty(folio);
213		folio_unlock(folio);
214		folio_put(folio);
215
216		count -= len;
217		pos += len;
218		buf += len;
219	}
220	memalloc_nofs_restore(pflags);
221
222	if (count)
223		return -ENOMEM;
224	return 0;
225}
226
227/* Find the next written area in the xfile data for a given offset. */
228loff_t
229xfile_seek_data(
230	struct xfile		*xf,
231	loff_t			pos)
232{
233	loff_t			ret;
234
235	ret = vfs_llseek(xf->file, pos, SEEK_DATA);
236	trace_xfile_seek_data(xf, pos, ret);
237	return ret;
238}
239
240/*
241 * Grab the (locked) folio for a memory object.  The object cannot span a folio
242 * boundary.  Returns the locked folio if successful, NULL if there was no
243 * folio or it didn't cover the range requested, or an ERR_PTR on failure.
244 */
245struct folio *
246xfile_get_folio(
247	struct xfile		*xf,
248	loff_t			pos,
249	size_t			len,
250	unsigned int		flags)
251{
252	struct inode		*inode = file_inode(xf->file);
253	struct folio		*folio = NULL;
254	unsigned int		pflags;
255	int			error;
256
257	if (inode->i_sb->s_maxbytes - pos < len)
258		return ERR_PTR(-ENOMEM);
259
260	trace_xfile_get_folio(xf, pos, len);
261
262	/*
263	 * Increase the file size first so that shmem_get_folio(..., SGP_CACHE),
264	 * actually allocates a folio instead of erroring out.
265	 */
266	if ((flags & XFILE_ALLOC) && pos + len > i_size_read(inode))
267		i_size_write(inode, pos + len);
268
269	pflags = memalloc_nofs_save();
270	error = shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio,
271			(flags & XFILE_ALLOC) ? SGP_CACHE : SGP_READ);
272	memalloc_nofs_restore(pflags);
273	if (error)
274		return ERR_PTR(error);
275
276	if (!folio)
277		return NULL;
278
279	if (len > folio_size(folio) - offset_in_folio(folio, pos)) {
280		folio_unlock(folio);
281		folio_put(folio);
282		return NULL;
283	}
284
285	if (filemap_check_wb_err(inode->i_mapping, 0)) {
286		folio_unlock(folio);
287		folio_put(folio);
288		return ERR_PTR(-EIO);
289	}
290
291	/*
292	 * Mark the folio dirty so that it won't be reclaimed once we drop the
293	 * (potentially last) reference in xfile_put_folio.
294	 */
295	if (flags & XFILE_ALLOC)
296		folio_set_dirty(folio);
297	return folio;
298}
299
300/*
301 * Release the (locked) folio for a memory object.
302 */
303void
304xfile_put_folio(
305	struct xfile		*xf,
306	struct folio		*folio)
307{
308	trace_xfile_put_folio(xf, folio_pos(folio), folio_size(folio));
309
310	folio_unlock(folio);
311	folio_put(folio);
312}
313