1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Copyright (c) 2021-2024 Oracle.  All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6#include "xfs.h"
7#include "xfs_fs.h"
8#include "xfs_shared.h"
9#include "xfs_format.h"
10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h"
12#include "xfs_mount.h"
13#include "xfs_trans.h"
14#include "xfs_btree.h"
15#include "xfs_error.h"
16#include "xfs_buf_mem.h"
17#include "xfs_btree_mem.h"
18#include "xfs_ag.h"
19#include "xfs_buf_item.h"
20#include "xfs_trace.h"
21
22/* Set the root of an in-memory btree. */
23void
24xfbtree_set_root(
25	struct xfs_btree_cur		*cur,
26	const union xfs_btree_ptr	*ptr,
27	int				inc)
28{
29	ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_MEM);
30
31	cur->bc_mem.xfbtree->root = *ptr;
32	cur->bc_mem.xfbtree->nlevels += inc;
33}
34
35/* Initialize a pointer from the in-memory btree header. */
36void
37xfbtree_init_ptr_from_cur(
38	struct xfs_btree_cur		*cur,
39	union xfs_btree_ptr		*ptr)
40{
41	ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_MEM);
42
43	*ptr = cur->bc_mem.xfbtree->root;
44}
45
46/* Duplicate an in-memory btree cursor. */
47struct xfs_btree_cur *
48xfbtree_dup_cursor(
49	struct xfs_btree_cur		*cur)
50{
51	struct xfs_btree_cur		*ncur;
52
53	ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_MEM);
54
55	ncur = xfs_btree_alloc_cursor(cur->bc_mp, cur->bc_tp, cur->bc_ops,
56			cur->bc_maxlevels, cur->bc_cache);
57	ncur->bc_flags = cur->bc_flags;
58	ncur->bc_nlevels = cur->bc_nlevels;
59	ncur->bc_mem.xfbtree = cur->bc_mem.xfbtree;
60
61	if (cur->bc_mem.pag)
62		ncur->bc_mem.pag = xfs_perag_hold(cur->bc_mem.pag);
63
64	return ncur;
65}
66
67/* Close the btree xfile and release all resources. */
68void
69xfbtree_destroy(
70	struct xfbtree		*xfbt)
71{
72	xfs_buftarg_drain(xfbt->target);
73}
74
75/* Compute the number of bytes available for records. */
76static inline unsigned int
77xfbtree_rec_bytes(
78	struct xfs_mount		*mp,
79	const struct xfs_btree_ops	*ops)
80{
81	return XMBUF_BLOCKSIZE - XFS_BTREE_LBLOCK_CRC_LEN;
82}
83
84/* Initialize an empty leaf block as the btree root. */
85STATIC int
86xfbtree_init_leaf_block(
87	struct xfs_mount		*mp,
88	struct xfbtree			*xfbt,
89	const struct xfs_btree_ops	*ops)
90{
91	struct xfs_buf			*bp;
92	xfbno_t				bno = xfbt->highest_bno++;
93	int				error;
94
95	error = xfs_buf_get(xfbt->target, xfbno_to_daddr(bno), XFBNO_BBSIZE,
96			&bp);
97	if (error)
98		return error;
99
100	trace_xfbtree_create_root_buf(xfbt, bp);
101
102	bp->b_ops = ops->buf_ops;
103	xfs_btree_init_buf(mp, bp, ops, 0, 0, xfbt->owner);
104	xfs_buf_relse(bp);
105
106	xfbt->root.l = cpu_to_be64(bno);
107	return 0;
108}
109
110/*
111 * Create an in-memory btree root that can be used with the given xmbuf.
112 * Callers must set xfbt->owner.
113 */
114int
115xfbtree_init(
116	struct xfs_mount		*mp,
117	struct xfbtree			*xfbt,
118	struct xfs_buftarg		*btp,
119	const struct xfs_btree_ops	*ops)
120{
121	unsigned int			blocklen = xfbtree_rec_bytes(mp, ops);
122	unsigned int			keyptr_len;
123	int				error;
124
125	/* Requires a long-format CRC-format btree */
126	if (!xfs_has_crc(mp)) {
127		ASSERT(xfs_has_crc(mp));
128		return -EINVAL;
129	}
130	if (ops->ptr_len != XFS_BTREE_LONG_PTR_LEN) {
131		ASSERT(ops->ptr_len == XFS_BTREE_LONG_PTR_LEN);
132		return -EINVAL;
133	}
134
135	memset(xfbt, 0, sizeof(*xfbt));
136	xfbt->target = btp;
137
138	/* Set up min/maxrecs for this btree. */
139	keyptr_len = ops->key_len + sizeof(__be64);
140	xfbt->maxrecs[0] = blocklen / ops->rec_len;
141	xfbt->maxrecs[1] = blocklen / keyptr_len;
142	xfbt->minrecs[0] = xfbt->maxrecs[0] / 2;
143	xfbt->minrecs[1] = xfbt->maxrecs[1] / 2;
144	xfbt->highest_bno = 0;
145	xfbt->nlevels = 1;
146
147	/* Initialize the empty btree. */
148	error = xfbtree_init_leaf_block(mp, xfbt, ops);
149	if (error)
150		goto err_freesp;
151
152	trace_xfbtree_init(mp, xfbt, ops);
153
154	return 0;
155
156err_freesp:
157	xfs_buftarg_drain(xfbt->target);
158	return error;
159}
160
161/* Allocate a block to our in-memory btree. */
162int
163xfbtree_alloc_block(
164	struct xfs_btree_cur		*cur,
165	const union xfs_btree_ptr	*start,
166	union xfs_btree_ptr		*new,
167	int				*stat)
168{
169	struct xfbtree			*xfbt = cur->bc_mem.xfbtree;
170	xfbno_t				bno = xfbt->highest_bno++;
171
172	ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_MEM);
173
174	trace_xfbtree_alloc_block(xfbt, cur, bno);
175
176	/* Fail if the block address exceeds the maximum for the buftarg. */
177	if (!xfbtree_verify_bno(xfbt, bno)) {
178		ASSERT(xfbtree_verify_bno(xfbt, bno));
179		*stat = 0;
180		return 0;
181	}
182
183	new->l = cpu_to_be64(bno);
184	*stat = 1;
185	return 0;
186}
187
188/* Free a block from our in-memory btree. */
189int
190xfbtree_free_block(
191	struct xfs_btree_cur	*cur,
192	struct xfs_buf		*bp)
193{
194	struct xfbtree		*xfbt = cur->bc_mem.xfbtree;
195	xfs_daddr_t		daddr = xfs_buf_daddr(bp);
196	xfbno_t			bno = xfs_daddr_to_xfbno(daddr);
197
198	ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_MEM);
199
200	trace_xfbtree_free_block(xfbt, cur, bno);
201
202	if (bno + 1 == xfbt->highest_bno)
203		xfbt->highest_bno--;
204
205	return 0;
206}
207
208/* Return the minimum number of records for a btree block. */
209int
210xfbtree_get_minrecs(
211	struct xfs_btree_cur	*cur,
212	int			level)
213{
214	struct xfbtree		*xfbt = cur->bc_mem.xfbtree;
215
216	return xfbt->minrecs[level != 0];
217}
218
219/* Return the maximum number of records for a btree block. */
220int
221xfbtree_get_maxrecs(
222	struct xfs_btree_cur	*cur,
223	int			level)
224{
225	struct xfbtree		*xfbt = cur->bc_mem.xfbtree;
226
227	return xfbt->maxrecs[level != 0];
228}
229
230/* If this log item is a buffer item that came from the xfbtree, return it. */
231static inline struct xfs_buf *
232xfbtree_buf_match(
233	struct xfbtree			*xfbt,
234	const struct xfs_log_item	*lip)
235{
236	const struct xfs_buf_log_item	*bli;
237	struct xfs_buf			*bp;
238
239	if (lip->li_type != XFS_LI_BUF)
240		return NULL;
241
242	bli = container_of(lip, struct xfs_buf_log_item, bli_item);
243	bp = bli->bli_buf;
244	if (bp->b_target != xfbt->target)
245		return NULL;
246
247	return bp;
248}
249
250/*
251 * Commit changes to the incore btree immediately by writing all dirty xfbtree
252 * buffers to the backing xfile.  This detaches all xfbtree buffers from the
253 * transaction, even on failure.  The buffer locks are dropped between the
254 * delwri queue and submit, so the caller must synchronize btree access.
255 *
256 * Normally we'd let the buffers commit with the transaction and get written to
257 * the xfile via the log, but online repair stages ephemeral btrees in memory
258 * and uses the btree_staging functions to write new btrees to disk atomically.
259 * The in-memory btree (and its backing store) are discarded at the end of the
260 * repair phase, which means that xfbtree buffers cannot commit with the rest
261 * of a transaction.
262 *
263 * In other words, online repair only needs the transaction to collect buffer
264 * pointers and to avoid buffer deadlocks, not to guarantee consistency of
265 * updates.
266 */
267int
268xfbtree_trans_commit(
269	struct xfbtree		*xfbt,
270	struct xfs_trans	*tp)
271{
272	struct xfs_log_item	*lip, *n;
273	bool			tp_dirty = false;
274	int			error = 0;
275
276	/*
277	 * For each xfbtree buffer attached to the transaction, write the dirty
278	 * buffers to the xfile and release them.
279	 */
280	list_for_each_entry_safe(lip, n, &tp->t_items, li_trans) {
281		struct xfs_buf	*bp = xfbtree_buf_match(xfbt, lip);
282
283		if (!bp) {
284			if (test_bit(XFS_LI_DIRTY, &lip->li_flags))
285				tp_dirty |= true;
286			continue;
287		}
288
289		trace_xfbtree_trans_commit_buf(xfbt, bp);
290
291		xmbuf_trans_bdetach(tp, bp);
292
293		/*
294		 * If the buffer fails verification, note the failure but
295		 * continue walking the transaction items so that we remove all
296		 * ephemeral btree buffers.
297		 */
298		if (!error)
299			error = xmbuf_finalize(bp);
300
301		xfs_buf_relse(bp);
302	}
303
304	/*
305	 * Reset the transaction's dirty flag to reflect the dirty state of the
306	 * log items that are still attached.
307	 */
308	tp->t_flags = (tp->t_flags & ~XFS_TRANS_DIRTY) |
309			(tp_dirty ? XFS_TRANS_DIRTY : 0);
310
311	return error;
312}
313
314/*
315 * Cancel changes to the incore btree by detaching all the xfbtree buffers.
316 * Changes are not undone, so callers must not access the btree ever again.
317 */
318void
319xfbtree_trans_cancel(
320	struct xfbtree		*xfbt,
321	struct xfs_trans	*tp)
322{
323	struct xfs_log_item	*lip, *n;
324	bool			tp_dirty = false;
325
326	list_for_each_entry_safe(lip, n, &tp->t_items, li_trans) {
327		struct xfs_buf	*bp = xfbtree_buf_match(xfbt, lip);
328
329		if (!bp) {
330			if (test_bit(XFS_LI_DIRTY, &lip->li_flags))
331				tp_dirty |= true;
332			continue;
333		}
334
335		trace_xfbtree_trans_cancel_buf(xfbt, bp);
336
337		xmbuf_trans_bdetach(tp, bp);
338		xfs_buf_relse(bp);
339	}
340
341	/*
342	 * Reset the transaction's dirty flag to reflect the dirty state of the
343	 * log items that are still attached.
344	 */
345	tp->t_flags = (tp->t_flags & ~XFS_TRANS_DIRTY) |
346			(tp_dirty ? XFS_TRANS_DIRTY : 0);
347}
348