1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * Copyright (c) 2023-2024 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6#include "xfs.h" 7#include "xfs_fs.h" 8#include "xfs_buf.h" 9#include "xfs_buf_mem.h" 10#include "xfs_trace.h" 11#include <linux/shmem_fs.h> 12#include "xfs_log_format.h" 13#include "xfs_trans.h" 14#include "xfs_buf_item.h" 15#include "xfs_error.h" 16 17/* 18 * Buffer Cache for In-Memory Files 19 * ================================ 20 * 21 * Online fsck wants to create ephemeral ordered recordsets. The existing 22 * btree infrastructure can do this, but we need the buffer cache to target 23 * memory instead of block devices. 24 * 25 * When CONFIG_TMPFS=y, shmemfs is enough of a filesystem to meet those 26 * requirements. Therefore, the xmbuf mechanism uses an unlinked shmem file to 27 * store our staging data. This file is not installed in the file descriptor 28 * table so that user programs cannot access the data, which means that the 29 * xmbuf must be freed with xmbuf_destroy. 30 * 31 * xmbufs assume that the caller will handle all required concurrency 32 * management; standard vfs locks (freezer and inode) are not taken. Reads 33 * and writes are satisfied directly from the page cache. 34 * 35 * The only supported block size is PAGE_SIZE, and we cannot use highmem. 36 */ 37 38/* 39 * shmem files used to back an in-memory buffer cache must not be exposed to 40 * userspace. Upper layers must coordinate access to the one handle returned 41 * by the constructor, so establish a separate lock class for xmbufs to avoid 42 * confusing lockdep. 43 */ 44static struct lock_class_key xmbuf_i_mutex_key; 45 46/* 47 * Allocate a buffer cache target for a memory-backed file and set up the 48 * buffer target. 49 */ 50int 51xmbuf_alloc( 52 struct xfs_mount *mp, 53 const char *descr, 54 struct xfs_buftarg **btpp) 55{ 56 struct file *file; 57 struct inode *inode; 58 struct xfs_buftarg *btp; 59 int error; 60 61 btp = kzalloc(struct_size(btp, bt_cache, 1), GFP_KERNEL); 62 if (!btp) 63 return -ENOMEM; 64 65 file = shmem_kernel_file_setup(descr, 0, 0); 66 if (IS_ERR(file)) { 67 error = PTR_ERR(file); 68 goto out_free_btp; 69 } 70 inode = file_inode(file); 71 72 /* private file, private locking */ 73 lockdep_set_class(&inode->i_rwsem, &xmbuf_i_mutex_key); 74 75 /* 76 * We don't want to bother with kmapping data during repair, so don't 77 * allow highmem pages to back this mapping. 78 */ 79 mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL); 80 81 /* ensure all writes are below EOF to avoid pagecache zeroing */ 82 i_size_write(inode, inode->i_sb->s_maxbytes); 83 84 error = xfs_buf_cache_init(btp->bt_cache); 85 if (error) 86 goto out_file; 87 88 /* Initialize buffer target */ 89 btp->bt_mount = mp; 90 btp->bt_dev = (dev_t)-1U; 91 btp->bt_bdev = NULL; /* in-memory buftargs have no bdev */ 92 btp->bt_file = file; 93 btp->bt_meta_sectorsize = XMBUF_BLOCKSIZE; 94 btp->bt_meta_sectormask = XMBUF_BLOCKSIZE - 1; 95 96 error = xfs_init_buftarg(btp, XMBUF_BLOCKSIZE, descr); 97 if (error) 98 goto out_bcache; 99 100 trace_xmbuf_create(btp); 101 102 *btpp = btp; 103 return 0; 104 105out_bcache: 106 xfs_buf_cache_destroy(btp->bt_cache); 107out_file: 108 fput(file); 109out_free_btp: 110 kfree(btp); 111 return error; 112} 113 114/* Free a buffer cache target for a memory-backed buffer cache. */ 115void 116xmbuf_free( 117 struct xfs_buftarg *btp) 118{ 119 ASSERT(xfs_buftarg_is_mem(btp)); 120 ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0); 121 122 trace_xmbuf_free(btp); 123 124 xfs_destroy_buftarg(btp); 125 xfs_buf_cache_destroy(btp->bt_cache); 126 fput(btp->bt_file); 127 kfree(btp); 128} 129 130/* Directly map a shmem page into the buffer cache. */ 131int 132xmbuf_map_page( 133 struct xfs_buf *bp) 134{ 135 struct inode *inode = file_inode(bp->b_target->bt_file); 136 struct folio *folio = NULL; 137 struct page *page; 138 loff_t pos = BBTOB(xfs_buf_daddr(bp)); 139 int error; 140 141 ASSERT(xfs_buftarg_is_mem(bp->b_target)); 142 143 if (bp->b_map_count != 1) 144 return -ENOMEM; 145 if (BBTOB(bp->b_length) != XMBUF_BLOCKSIZE) 146 return -ENOMEM; 147 if (offset_in_page(pos) != 0) { 148 ASSERT(offset_in_page(pos)); 149 return -ENOMEM; 150 } 151 152 error = shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio, SGP_CACHE); 153 if (error) 154 return error; 155 156 if (filemap_check_wb_err(inode->i_mapping, 0)) { 157 folio_unlock(folio); 158 folio_put(folio); 159 return -EIO; 160 } 161 162 page = folio_file_page(folio, pos >> PAGE_SHIFT); 163 164 /* 165 * Mark the page dirty so that it won't be reclaimed once we drop the 166 * (potentially last) reference in xmbuf_unmap_page. 167 */ 168 set_page_dirty(page); 169 unlock_page(page); 170 171 bp->b_addr = page_address(page); 172 bp->b_pages = bp->b_page_array; 173 bp->b_pages[0] = page; 174 bp->b_page_count = 1; 175 return 0; 176} 177 178/* Unmap a shmem page that was mapped into the buffer cache. */ 179void 180xmbuf_unmap_page( 181 struct xfs_buf *bp) 182{ 183 struct page *page = bp->b_pages[0]; 184 185 ASSERT(xfs_buftarg_is_mem(bp->b_target)); 186 187 put_page(page); 188 189 bp->b_addr = NULL; 190 bp->b_pages[0] = NULL; 191 bp->b_pages = NULL; 192 bp->b_page_count = 0; 193} 194 195/* Is this a valid daddr within the buftarg? */ 196bool 197xmbuf_verify_daddr( 198 struct xfs_buftarg *btp, 199 xfs_daddr_t daddr) 200{ 201 struct inode *inode = file_inode(btp->bt_file); 202 203 ASSERT(xfs_buftarg_is_mem(btp)); 204 205 return daddr < (inode->i_sb->s_maxbytes >> BBSHIFT); 206} 207 208/* Discard the page backing this buffer. */ 209static void 210xmbuf_stale( 211 struct xfs_buf *bp) 212{ 213 struct inode *inode = file_inode(bp->b_target->bt_file); 214 loff_t pos; 215 216 ASSERT(xfs_buftarg_is_mem(bp->b_target)); 217 218 pos = BBTOB(xfs_buf_daddr(bp)); 219 shmem_truncate_range(inode, pos, pos + BBTOB(bp->b_length) - 1); 220} 221 222/* 223 * Finalize a buffer -- discard the backing page if it's stale, or run the 224 * write verifier to detect problems. 225 */ 226int 227xmbuf_finalize( 228 struct xfs_buf *bp) 229{ 230 xfs_failaddr_t fa; 231 int error = 0; 232 233 if (bp->b_flags & XBF_STALE) { 234 xmbuf_stale(bp); 235 return 0; 236 } 237 238 /* 239 * Although this btree is ephemeral, validate the buffer structure so 240 * that we can detect memory corruption errors and software bugs. 241 */ 242 fa = bp->b_ops->verify_struct(bp); 243 if (fa) { 244 error = -EFSCORRUPTED; 245 xfs_verifier_error(bp, error, fa); 246 } 247 248 return error; 249} 250 251/* 252 * Detach this xmbuf buffer from the transaction by any means necessary. 253 * All buffers are direct-mapped, so they do not need bwrite. 254 */ 255void 256xmbuf_trans_bdetach( 257 struct xfs_trans *tp, 258 struct xfs_buf *bp) 259{ 260 struct xfs_buf_log_item *bli = bp->b_log_item; 261 262 ASSERT(bli != NULL); 263 264 bli->bli_flags &= ~(XFS_BLI_DIRTY | XFS_BLI_ORDERED | 265 XFS_BLI_LOGGED | XFS_BLI_STALE); 266 clear_bit(XFS_LI_DIRTY, &bli->bli_item.li_flags); 267 268 while (bp->b_log_item != NULL) 269 xfs_trans_bdetach(tp, bp); 270} 271