1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
24 * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
25 * Copyright (c) 2019, Allan Jude
26 * Copyright (c) 2019, Klara Inc.
27 */
28
29#ifndef	_SYS_ARC_H
30#define	_SYS_ARC_H
31
32#include <sys/zfs_context.h>
33
34#ifdef	__cplusplus
35extern "C" {
36#endif
37
38#include <sys/zio.h>
39#include <sys/dmu.h>
40#include <sys/spa.h>
41#include <sys/zfs_refcount.h>
42
43/*
44 * Used by arc_flush() to inform arc_evict_state() that it should evict
45 * all available buffers from the arc state being passed in.
46 */
47#define	ARC_EVICT_ALL	UINT64_MAX
48
49/*
50 * ZFS gets very unhappy when the maximum ARC size is smaller than the maximum
51 * block size and a larger block is written.  To leave some safety margin, we
52 * limit the minimum for zfs_arc_max to the maximium transaction size.
53 */
54#define	MIN_ARC_MAX	DMU_MAX_ACCESS
55
56#define	HDR_SET_LSIZE(hdr, x) do { \
57	ASSERT(IS_P2ALIGNED(x, 1U << SPA_MINBLOCKSHIFT)); \
58	(hdr)->b_lsize = ((x) >> SPA_MINBLOCKSHIFT); \
59} while (0)
60
61#define	HDR_SET_PSIZE(hdr, x) do { \
62	ASSERT(IS_P2ALIGNED((x), 1U << SPA_MINBLOCKSHIFT)); \
63	(hdr)->b_psize = ((x) >> SPA_MINBLOCKSHIFT); \
64} while (0)
65
66#define	HDR_GET_LSIZE(hdr)	((hdr)->b_lsize << SPA_MINBLOCKSHIFT)
67#define	HDR_GET_PSIZE(hdr)	((hdr)->b_psize << SPA_MINBLOCKSHIFT)
68
69typedef struct arc_buf_hdr arc_buf_hdr_t;
70typedef struct arc_buf arc_buf_t;
71typedef struct arc_prune arc_prune_t;
72
73/*
74 * Because the ARC can store encrypted data, errors (not due to bugs) may arise
75 * while transforming data into its desired format - specifically, when
76 * decrypting, the key may not be present, or the HMAC may not be correct
77 * which signifies deliberate tampering with the on-disk state
78 * (assuming that the checksum was correct). If any error occurs, the "buf"
79 * parameter will be NULL.
80 */
81typedef void arc_read_done_func_t(zio_t *zio, const zbookmark_phys_t *zb,
82    const blkptr_t *bp, arc_buf_t *buf, void *priv);
83typedef void arc_write_done_func_t(zio_t *zio, arc_buf_t *buf, void *priv);
84typedef void arc_prune_func_t(uint64_t bytes, void *priv);
85
86/* Shared module parameters */
87extern uint_t zfs_arc_average_blocksize;
88extern int l2arc_exclude_special;
89
90/* generic arc_done_func_t's which you can use */
91arc_read_done_func_t arc_bcopy_func;
92arc_read_done_func_t arc_getbuf_func;
93
94/* generic arc_prune_func_t wrapper for callbacks */
95struct arc_prune {
96	arc_prune_func_t	*p_pfunc;
97	void			*p_private;
98	uint64_t		p_adjust;
99	list_node_t		p_node;
100	zfs_refcount_t		p_refcnt;
101};
102
103typedef enum arc_strategy {
104	ARC_STRATEGY_META_ONLY		= 0, /* Evict only meta data buffers */
105	ARC_STRATEGY_META_BALANCED	= 1, /* Evict data buffers if needed */
106} arc_strategy_t;
107
108typedef enum arc_flags
109{
110	/*
111	 * Public flags that can be passed into the ARC by external consumers.
112	 */
113	ARC_FLAG_WAIT			= 1 << 0,	/* perform sync I/O */
114	ARC_FLAG_NOWAIT			= 1 << 1,	/* perform async I/O */
115	ARC_FLAG_PREFETCH		= 1 << 2,	/* I/O is a prefetch */
116	ARC_FLAG_CACHED			= 1 << 3,	/* I/O was in cache */
117	ARC_FLAG_L2CACHE		= 1 << 4,	/* cache in L2ARC */
118	ARC_FLAG_UNCACHED		= 1 << 5,	/* evict after use */
119	ARC_FLAG_PRESCIENT_PREFETCH	= 1 << 6,	/* long min lifespan */
120
121	/*
122	 * Private ARC flags.  These flags are private ARC only flags that
123	 * will show up in b_flags in the arc_hdr_buf_t. These flags should
124	 * only be set by ARC code.
125	 */
126	ARC_FLAG_IN_HASH_TABLE		= 1 << 7,	/* buffer is hashed */
127	ARC_FLAG_IO_IN_PROGRESS		= 1 << 8,	/* I/O in progress */
128	ARC_FLAG_IO_ERROR		= 1 << 9,	/* I/O failed for buf */
129	ARC_FLAG_INDIRECT		= 1 << 10,	/* indirect block */
130	/* Indicates that block was read with ASYNC priority. */
131	ARC_FLAG_PRIO_ASYNC_READ	= 1 << 11,
132	ARC_FLAG_L2_WRITING		= 1 << 12,	/* write in progress */
133	ARC_FLAG_L2_EVICTED		= 1 << 13,	/* evicted during I/O */
134	ARC_FLAG_L2_WRITE_HEAD		= 1 << 14,	/* head of write list */
135	/*
136	 * Encrypted or authenticated on disk (may be plaintext in memory).
137	 * This header has b_crypt_hdr allocated. Does not include indirect
138	 * blocks with checksums of MACs which will also have their X
139	 * (encrypted) bit set in the bp.
140	 */
141	ARC_FLAG_PROTECTED		= 1 << 15,
142	/* data has not been authenticated yet */
143	ARC_FLAG_NOAUTH			= 1 << 16,
144	/* indicates that the buffer contains metadata (otherwise, data) */
145	ARC_FLAG_BUFC_METADATA		= 1 << 17,
146
147	/* Flags specifying whether optional hdr struct fields are defined */
148	ARC_FLAG_HAS_L1HDR		= 1 << 18,
149	ARC_FLAG_HAS_L2HDR		= 1 << 19,
150
151	/*
152	 * Indicates the arc_buf_hdr_t's b_pdata matches the on-disk data.
153	 * This allows the l2arc to use the blkptr's checksum to verify
154	 * the data without having to store the checksum in the hdr.
155	 */
156	ARC_FLAG_COMPRESSED_ARC		= 1 << 20,
157	ARC_FLAG_SHARED_DATA		= 1 << 21,
158
159	/*
160	 * Fail this arc_read() (with ENOENT) if the data is not already present
161	 * in cache.
162	 */
163	ARC_FLAG_CACHED_ONLY		= 1 << 22,
164
165	/*
166	 * Don't instantiate an arc_buf_t for arc_read_done.
167	 */
168	ARC_FLAG_NO_BUF			= 1 << 23,
169
170	/*
171	 * The arc buffer's compression mode is stored in the top 7 bits of the
172	 * flags field, so these dummy flags are included so that MDB can
173	 * interpret the enum properly.
174	 */
175	ARC_FLAG_COMPRESS_0		= 1 << 24,
176	ARC_FLAG_COMPRESS_1		= 1 << 25,
177	ARC_FLAG_COMPRESS_2		= 1 << 26,
178	ARC_FLAG_COMPRESS_3		= 1 << 27,
179	ARC_FLAG_COMPRESS_4		= 1 << 28,
180	ARC_FLAG_COMPRESS_5		= 1 << 29,
181	ARC_FLAG_COMPRESS_6		= 1 << 30
182
183} arc_flags_t;
184
185typedef enum arc_buf_flags {
186	ARC_BUF_FLAG_SHARED		= 1 << 0,
187	ARC_BUF_FLAG_COMPRESSED		= 1 << 1,
188	/*
189	 * indicates whether this arc_buf_t is encrypted, regardless of
190	 * state on-disk
191	 */
192	ARC_BUF_FLAG_ENCRYPTED		= 1 << 2
193} arc_buf_flags_t;
194
195struct arc_buf {
196	arc_buf_hdr_t		*b_hdr;
197	arc_buf_t		*b_next;
198	void			*b_data;
199	arc_buf_flags_t		b_flags;
200};
201
202typedef enum arc_buf_contents {
203	ARC_BUFC_DATA,				/* buffer contains data */
204	ARC_BUFC_METADATA,			/* buffer contains metadata */
205	ARC_BUFC_NUMTYPES
206} arc_buf_contents_t;
207
208/*
209 * The following breakdowns of arc_size exist for kstat only.
210 */
211typedef enum arc_space_type {
212	ARC_SPACE_DATA,
213	ARC_SPACE_META,
214	ARC_SPACE_HDRS,
215	ARC_SPACE_L2HDRS,
216	ARC_SPACE_DBUF,
217	ARC_SPACE_DNODE,
218	ARC_SPACE_BONUS,
219	ARC_SPACE_ABD_CHUNK_WASTE,
220	ARC_SPACE_NUMTYPES
221} arc_space_type_t;
222
223typedef enum arc_state_type {
224	ARC_STATE_ANON,
225	ARC_STATE_MRU,
226	ARC_STATE_MRU_GHOST,
227	ARC_STATE_MFU,
228	ARC_STATE_MFU_GHOST,
229	ARC_STATE_L2C_ONLY,
230	ARC_STATE_UNCACHED,
231	ARC_STATE_NUMTYPES
232} arc_state_type_t;
233
234typedef struct arc_buf_info {
235	arc_state_type_t	abi_state_type;
236	arc_buf_contents_t	abi_state_contents;
237	uint32_t		abi_flags;
238	uint32_t		abi_bufcnt;
239	uint64_t		abi_size;
240	uint64_t		abi_spa;
241	uint64_t		abi_access;
242	uint32_t		abi_mru_hits;
243	uint32_t		abi_mru_ghost_hits;
244	uint32_t		abi_mfu_hits;
245	uint32_t		abi_mfu_ghost_hits;
246	uint32_t		abi_l2arc_hits;
247	uint32_t		abi_holds;
248	uint64_t		abi_l2arc_dattr;
249	uint64_t		abi_l2arc_asize;
250	enum zio_compress	abi_l2arc_compress;
251} arc_buf_info_t;
252
253void arc_space_consume(uint64_t space, arc_space_type_t type);
254void arc_space_return(uint64_t space, arc_space_type_t type);
255boolean_t arc_is_metadata(arc_buf_t *buf);
256boolean_t arc_is_encrypted(arc_buf_t *buf);
257boolean_t arc_is_unauthenticated(arc_buf_t *buf);
258enum zio_compress arc_get_compression(arc_buf_t *buf);
259void arc_get_raw_params(arc_buf_t *buf, boolean_t *byteorder, uint8_t *salt,
260    uint8_t *iv, uint8_t *mac);
261int arc_untransform(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb,
262    boolean_t in_place);
263void arc_convert_to_raw(arc_buf_t *buf, uint64_t dsobj, boolean_t byteorder,
264    dmu_object_type_t ot, const uint8_t *salt, const uint8_t *iv,
265    const uint8_t *mac);
266arc_buf_t *arc_alloc_buf(spa_t *spa, const void *tag, arc_buf_contents_t type,
267    int32_t size);
268arc_buf_t *arc_alloc_compressed_buf(spa_t *spa, const void *tag,
269    uint64_t psize, uint64_t lsize, enum zio_compress compression_type,
270    uint8_t complevel);
271arc_buf_t *arc_alloc_raw_buf(spa_t *spa, const void *tag, uint64_t dsobj,
272    boolean_t byteorder, const uint8_t *salt, const uint8_t *iv,
273    const uint8_t *mac, dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
274    enum zio_compress compression_type, uint8_t complevel);
275uint8_t arc_get_complevel(arc_buf_t *buf);
276arc_buf_t *arc_loan_buf(spa_t *spa, boolean_t is_metadata, int size);
277arc_buf_t *arc_loan_compressed_buf(spa_t *spa, uint64_t psize, uint64_t lsize,
278    enum zio_compress compression_type, uint8_t complevel);
279arc_buf_t *arc_loan_raw_buf(spa_t *spa, uint64_t dsobj, boolean_t byteorder,
280    const uint8_t *salt, const uint8_t *iv, const uint8_t *mac,
281    dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
282    enum zio_compress compression_type, uint8_t complevel);
283void arc_return_buf(arc_buf_t *buf, const void *tag);
284void arc_loan_inuse_buf(arc_buf_t *buf, const void *tag);
285void arc_buf_destroy(arc_buf_t *buf, const void *tag);
286void arc_buf_info(arc_buf_t *buf, arc_buf_info_t *abi, int state_index);
287uint64_t arc_buf_size(arc_buf_t *buf);
288uint64_t arc_buf_lsize(arc_buf_t *buf);
289void arc_buf_access(arc_buf_t *buf);
290void arc_release(arc_buf_t *buf, const void *tag);
291int arc_released(arc_buf_t *buf);
292void arc_buf_sigsegv(int sig, siginfo_t *si, void *unused);
293void arc_buf_freeze(arc_buf_t *buf);
294void arc_buf_thaw(arc_buf_t *buf);
295#ifdef ZFS_DEBUG
296int arc_referenced(arc_buf_t *buf);
297#else
298#define	arc_referenced(buf) ((void) sizeof (buf), 0)
299#endif
300
301int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
302    arc_read_done_func_t *done, void *priv, zio_priority_t priority,
303    int flags, arc_flags_t *arc_flags, const zbookmark_phys_t *zb);
304zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
305    arc_buf_t *buf, boolean_t uncached, boolean_t l2arc, const zio_prop_t *zp,
306    arc_write_done_func_t *ready, arc_write_done_func_t *child_ready,
307    arc_write_done_func_t *done, void *priv, zio_priority_t priority,
308    int zio_flags, const zbookmark_phys_t *zb);
309
310arc_prune_t *arc_add_prune_callback(arc_prune_func_t *func, void *priv);
311void arc_remove_prune_callback(arc_prune_t *p);
312void arc_freed(spa_t *spa, const blkptr_t *bp);
313
314void arc_flush(spa_t *spa, boolean_t retry);
315void arc_tempreserve_clear(uint64_t reserve);
316int arc_tempreserve_space(spa_t *spa, uint64_t reserve, uint64_t txg);
317
318uint64_t arc_all_memory(void);
319uint64_t arc_default_max(uint64_t min, uint64_t allmem);
320uint64_t arc_target_bytes(void);
321void arc_set_limits(uint64_t);
322void arc_init(void);
323void arc_fini(void);
324
325/*
326 * Level 2 ARC
327 */
328
329void l2arc_add_vdev(spa_t *spa, vdev_t *vd);
330void l2arc_remove_vdev(vdev_t *vd);
331boolean_t l2arc_vdev_present(vdev_t *vd);
332void l2arc_rebuild_vdev(vdev_t *vd, boolean_t reopen);
333boolean_t l2arc_range_check_overlap(uint64_t bottom, uint64_t top,
334    uint64_t check);
335void l2arc_init(void);
336void l2arc_fini(void);
337void l2arc_start(void);
338void l2arc_stop(void);
339void l2arc_spa_rebuild_start(spa_t *spa);
340
341#ifndef _KERNEL
342extern boolean_t arc_watch;
343#endif
344
345#ifdef	__cplusplus
346}
347#endif
348
349#endif /* _SYS_ARC_H */
350