1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Copyright (c) 2023-2024 Oracle.  All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6#include "xfs.h"
7#include "xfs_fs.h"
8#include "xfs_buf.h"
9#include "xfs_buf_mem.h"
10#include "xfs_trace.h"
11#include <linux/shmem_fs.h>
12#include "xfs_log_format.h"
13#include "xfs_trans.h"
14#include "xfs_buf_item.h"
15#include "xfs_error.h"
16
17/*
18 * Buffer Cache for In-Memory Files
19 * ================================
20 *
21 * Online fsck wants to create ephemeral ordered recordsets.  The existing
22 * btree infrastructure can do this, but we need the buffer cache to target
23 * memory instead of block devices.
24 *
25 * When CONFIG_TMPFS=y, shmemfs is enough of a filesystem to meet those
26 * requirements.  Therefore, the xmbuf mechanism uses an unlinked shmem file to
27 * store our staging data.  This file is not installed in the file descriptor
28 * table so that user programs cannot access the data, which means that the
29 * xmbuf must be freed with xmbuf_destroy.
30 *
31 * xmbufs assume that the caller will handle all required concurrency
32 * management; standard vfs locks (freezer and inode) are not taken.  Reads
33 * and writes are satisfied directly from the page cache.
34 *
35 * The only supported block size is PAGE_SIZE, and we cannot use highmem.
36 */
37
38/*
39 * shmem files used to back an in-memory buffer cache must not be exposed to
40 * userspace.  Upper layers must coordinate access to the one handle returned
41 * by the constructor, so establish a separate lock class for xmbufs to avoid
42 * confusing lockdep.
43 */
44static struct lock_class_key xmbuf_i_mutex_key;
45
46/*
47 * Allocate a buffer cache target for a memory-backed file and set up the
48 * buffer target.
49 */
50int
51xmbuf_alloc(
52	struct xfs_mount	*mp,
53	const char		*descr,
54	struct xfs_buftarg	**btpp)
55{
56	struct file		*file;
57	struct inode		*inode;
58	struct xfs_buftarg	*btp;
59	int			error;
60
61	btp = kzalloc(struct_size(btp, bt_cache, 1), GFP_KERNEL);
62	if (!btp)
63		return -ENOMEM;
64
65	file = shmem_kernel_file_setup(descr, 0, 0);
66	if (IS_ERR(file)) {
67		error = PTR_ERR(file);
68		goto out_free_btp;
69	}
70	inode = file_inode(file);
71
72	/* private file, private locking */
73	lockdep_set_class(&inode->i_rwsem, &xmbuf_i_mutex_key);
74
75	/*
76	 * We don't want to bother with kmapping data during repair, so don't
77	 * allow highmem pages to back this mapping.
78	 */
79	mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);
80
81	/* ensure all writes are below EOF to avoid pagecache zeroing */
82	i_size_write(inode, inode->i_sb->s_maxbytes);
83
84	error = xfs_buf_cache_init(btp->bt_cache);
85	if (error)
86		goto out_file;
87
88	/* Initialize buffer target */
89	btp->bt_mount = mp;
90	btp->bt_dev = (dev_t)-1U;
91	btp->bt_bdev = NULL; /* in-memory buftargs have no bdev */
92	btp->bt_file = file;
93	btp->bt_meta_sectorsize = XMBUF_BLOCKSIZE;
94	btp->bt_meta_sectormask = XMBUF_BLOCKSIZE - 1;
95
96	error = xfs_init_buftarg(btp, XMBUF_BLOCKSIZE, descr);
97	if (error)
98		goto out_bcache;
99
100	trace_xmbuf_create(btp);
101
102	*btpp = btp;
103	return 0;
104
105out_bcache:
106	xfs_buf_cache_destroy(btp->bt_cache);
107out_file:
108	fput(file);
109out_free_btp:
110	kfree(btp);
111	return error;
112}
113
114/* Free a buffer cache target for a memory-backed buffer cache. */
115void
116xmbuf_free(
117	struct xfs_buftarg	*btp)
118{
119	ASSERT(xfs_buftarg_is_mem(btp));
120	ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0);
121
122	trace_xmbuf_free(btp);
123
124	xfs_destroy_buftarg(btp);
125	xfs_buf_cache_destroy(btp->bt_cache);
126	fput(btp->bt_file);
127	kfree(btp);
128}
129
130/* Directly map a shmem page into the buffer cache. */
131int
132xmbuf_map_page(
133	struct xfs_buf		*bp)
134{
135	struct inode		*inode = file_inode(bp->b_target->bt_file);
136	struct folio		*folio = NULL;
137	struct page		*page;
138	loff_t                  pos = BBTOB(xfs_buf_daddr(bp));
139	int			error;
140
141	ASSERT(xfs_buftarg_is_mem(bp->b_target));
142
143	if (bp->b_map_count != 1)
144		return -ENOMEM;
145	if (BBTOB(bp->b_length) != XMBUF_BLOCKSIZE)
146		return -ENOMEM;
147	if (offset_in_page(pos) != 0) {
148		ASSERT(offset_in_page(pos));
149		return -ENOMEM;
150	}
151
152	error = shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio, SGP_CACHE);
153	if (error)
154		return error;
155
156	if (filemap_check_wb_err(inode->i_mapping, 0)) {
157		folio_unlock(folio);
158		folio_put(folio);
159		return -EIO;
160	}
161
162	page = folio_file_page(folio, pos >> PAGE_SHIFT);
163
164	/*
165	 * Mark the page dirty so that it won't be reclaimed once we drop the
166	 * (potentially last) reference in xmbuf_unmap_page.
167	 */
168	set_page_dirty(page);
169	unlock_page(page);
170
171	bp->b_addr = page_address(page);
172	bp->b_pages = bp->b_page_array;
173	bp->b_pages[0] = page;
174	bp->b_page_count = 1;
175	return 0;
176}
177
178/* Unmap a shmem page that was mapped into the buffer cache. */
179void
180xmbuf_unmap_page(
181	struct xfs_buf		*bp)
182{
183	struct page		*page = bp->b_pages[0];
184
185	ASSERT(xfs_buftarg_is_mem(bp->b_target));
186
187	put_page(page);
188
189	bp->b_addr = NULL;
190	bp->b_pages[0] = NULL;
191	bp->b_pages = NULL;
192	bp->b_page_count = 0;
193}
194
195/* Is this a valid daddr within the buftarg? */
196bool
197xmbuf_verify_daddr(
198	struct xfs_buftarg	*btp,
199	xfs_daddr_t		daddr)
200{
201	struct inode		*inode = file_inode(btp->bt_file);
202
203	ASSERT(xfs_buftarg_is_mem(btp));
204
205	return daddr < (inode->i_sb->s_maxbytes >> BBSHIFT);
206}
207
208/* Discard the page backing this buffer. */
209static void
210xmbuf_stale(
211	struct xfs_buf		*bp)
212{
213	struct inode		*inode = file_inode(bp->b_target->bt_file);
214	loff_t			pos;
215
216	ASSERT(xfs_buftarg_is_mem(bp->b_target));
217
218	pos = BBTOB(xfs_buf_daddr(bp));
219	shmem_truncate_range(inode, pos, pos + BBTOB(bp->b_length) - 1);
220}
221
222/*
223 * Finalize a buffer -- discard the backing page if it's stale, or run the
224 * write verifier to detect problems.
225 */
226int
227xmbuf_finalize(
228	struct xfs_buf		*bp)
229{
230	xfs_failaddr_t		fa;
231	int			error = 0;
232
233	if (bp->b_flags & XBF_STALE) {
234		xmbuf_stale(bp);
235		return 0;
236	}
237
238	/*
239	 * Although this btree is ephemeral, validate the buffer structure so
240	 * that we can detect memory corruption errors and software bugs.
241	 */
242	fa = bp->b_ops->verify_struct(bp);
243	if (fa) {
244		error = -EFSCORRUPTED;
245		xfs_verifier_error(bp, error, fa);
246	}
247
248	return error;
249}
250
251/*
252 * Detach this xmbuf buffer from the transaction by any means necessary.
253 * All buffers are direct-mapped, so they do not need bwrite.
254 */
255void
256xmbuf_trans_bdetach(
257	struct xfs_trans	*tp,
258	struct xfs_buf		*bp)
259{
260	struct xfs_buf_log_item	*bli = bp->b_log_item;
261
262	ASSERT(bli != NULL);
263
264	bli->bli_flags &= ~(XFS_BLI_DIRTY | XFS_BLI_ORDERED |
265			    XFS_BLI_LOGGED | XFS_BLI_STALE);
266	clear_bit(XFS_LI_DIRTY, &bli->bli_item.li_flags);
267
268	while (bp->b_log_item != NULL)
269		xfs_trans_bdetach(tp, bp);
270}
271