1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/*
27 * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
28 * Copyright (c) 2024, Klara Inc.
29 */
30
31#ifndef _ZIO_IMPL_H
32#define	_ZIO_IMPL_H
33
34#ifdef	__cplusplus
35extern "C" {
36#endif
37
38/*
39 * XXX -- Describe ZFS I/O pipeline here. Fill in as needed.
40 *
41 * The ZFS I/O pipeline is comprised of various stages which are defined
42 * in the zio_stage enum below. The individual stages are used to construct
43 * these basic I/O operations: Read, Write, Free, Claim, Flush and Trim.
44 *
45 * I/O operations: (XXX - provide detail for each of the operations)
46 *
47 * Read:
48 * Write:
49 * Free:
50 * Claim:
51 * Flush:
52 * Trim:
53 *
54 * Although the most common pipeline are used by the basic I/O operations
55 * above, there are some helper pipelines (one could consider them
56 * sub-pipelines) which are used internally by the ZIO module and are
57 * explained below:
58 *
59 * Interlock Pipeline:
60 * The interlock pipeline is the most basic pipeline and is used by all
61 * of the I/O operations. The interlock pipeline does not perform any I/O
62 * and is used to coordinate the dependencies between I/Os that are being
63 * issued (i.e. the parent/child relationship).
64 *
65 * Vdev child Pipeline:
66 * The vdev child pipeline is responsible for performing the physical I/O.
67 * It is in this pipeline where the I/O are queued and possibly cached.
68 *
69 * In addition to performing I/O, the pipeline is also responsible for
70 * data transformations. The transformations performed are based on the
71 * specific properties that user may have selected and modify the
72 * behavior of the pipeline. Examples of supported transformations are
73 * compression, dedup, and nop writes. Transformations will either modify
74 * the data or the pipeline. This list below further describes each of
75 * the supported transformations:
76 *
77 * Compression:
78 * ZFS supports five different flavors of compression -- gzip, lzjb, lz4, zle,
79 * and zstd. Compression occurs as part of the write pipeline and is
80 * performed in the ZIO_STAGE_WRITE_BP_INIT stage.
81 *
82 * Block cloning:
83 * The block cloning functionality introduces ZIO_STAGE_BRT_FREE stage which
84 * is called during a free pipeline. If the block is referenced in the
85 * Block Cloning Table (BRT) we will just decrease its reference counter
86 * instead of actually freeing the block.
87 *
88 * Dedup:
89 * Dedup reads are handled by the ZIO_STAGE_DDT_READ_START and
90 * ZIO_STAGE_DDT_READ_DONE stages. These stages are added to an existing
91 * read pipeline if the dedup bit is set on the block pointer.
92 * Writing a dedup block is performed by the ZIO_STAGE_DDT_WRITE stage
93 * and added to a write pipeline if a user has enabled dedup on that
94 * particular dataset.
95 *
96 * NOP Write:
97 * The NOP write feature is performed by the ZIO_STAGE_NOP_WRITE stage
98 * and is added to an existing write pipeline if a cryptographically
99 * secure checksum (i.e. SHA256) is enabled and compression is turned on.
100 * The NOP write stage will compare the checksums of the current data
101 * on-disk (level-0 blocks only) and the data that is currently being written.
102 * If the checksum values are identical then the pipeline is converted to
103 * an interlock pipeline skipping block allocation and bypassing the
104 * physical I/O.  The nop write feature can handle writes in either
105 * syncing or open context (i.e. zil writes) and as a result is mutually
106 * exclusive with dedup.
107 *
108 * Encryption:
109 * Encryption and authentication is handled by the ZIO_STAGE_ENCRYPT stage.
110 * This stage determines how the encryption metadata is stored in the bp.
111 * Decryption and MAC verification is performed during zio_decrypt() as a
112 * transform callback. Encryption is mutually exclusive with nopwrite, because
113 * blocks with the same plaintext will be encrypted with different salts and
114 * IV's (if dedup is off), and therefore have different ciphertexts. For dedup
115 * blocks we deterministically generate the IV and salt by performing an HMAC
116 * of the plaintext, which is computationally expensive, but allows us to keep
117 * support for encrypted dedup. See the block comment in zio_crypt.c for
118 * details.
119 */
120
121/*
122 * zio pipeline stage definitions
123 */
124enum zio_stage {
125	ZIO_STAGE_OPEN			= 1 << 0,	/* RWFCXT */
126
127	ZIO_STAGE_READ_BP_INIT		= 1 << 1,	/* R----- */
128	ZIO_STAGE_WRITE_BP_INIT		= 1 << 2,	/* -W---- */
129	ZIO_STAGE_FREE_BP_INIT		= 1 << 3,	/* --F--- */
130	ZIO_STAGE_ISSUE_ASYNC		= 1 << 4,	/* -WF--T */
131	ZIO_STAGE_WRITE_COMPRESS	= 1 << 5,	/* -W---- */
132
133	ZIO_STAGE_ENCRYPT		= 1 << 6,	/* -W---- */
134	ZIO_STAGE_CHECKSUM_GENERATE	= 1 << 7,	/* -W---- */
135
136	ZIO_STAGE_NOP_WRITE		= 1 << 8,	/* -W---- */
137
138	ZIO_STAGE_BRT_FREE		= 1 << 9,	/* --F--- */
139
140	ZIO_STAGE_DDT_READ_START	= 1 << 10,	/* R----- */
141	ZIO_STAGE_DDT_READ_DONE		= 1 << 11,	/* R----- */
142	ZIO_STAGE_DDT_WRITE		= 1 << 12,	/* -W---- */
143	ZIO_STAGE_DDT_FREE		= 1 << 13,	/* --F--- */
144
145	ZIO_STAGE_GANG_ASSEMBLE		= 1 << 14,	/* RWFC-- */
146	ZIO_STAGE_GANG_ISSUE		= 1 << 15,	/* RWFC-- */
147
148	ZIO_STAGE_DVA_THROTTLE		= 1 << 16,	/* -W---- */
149	ZIO_STAGE_DVA_ALLOCATE		= 1 << 17,	/* -W---- */
150	ZIO_STAGE_DVA_FREE		= 1 << 18,	/* --F--- */
151	ZIO_STAGE_DVA_CLAIM		= 1 << 19,	/* ---C-- */
152
153	ZIO_STAGE_READY			= 1 << 20,	/* RWFCXT */
154
155	ZIO_STAGE_VDEV_IO_START		= 1 << 21,	/* RW--XT */
156	ZIO_STAGE_VDEV_IO_DONE		= 1 << 22,	/* RW--XT */
157	ZIO_STAGE_VDEV_IO_ASSESS	= 1 << 23,	/* RW--XT */
158
159	ZIO_STAGE_CHECKSUM_VERIFY	= 1 << 24,	/* R----- */
160
161	ZIO_STAGE_DONE			= 1 << 25	/* RWFCXT */
162};
163
164#define	ZIO_ROOT_PIPELINE			\
165	ZIO_STAGE_DONE
166
167#define	ZIO_INTERLOCK_STAGES			\
168	(ZIO_STAGE_READY |			\
169	ZIO_STAGE_DONE)
170
171#define	ZIO_INTERLOCK_PIPELINE			\
172	ZIO_INTERLOCK_STAGES
173
174#define	ZIO_VDEV_IO_STAGES			\
175	(ZIO_STAGE_VDEV_IO_START |		\
176	ZIO_STAGE_VDEV_IO_DONE |		\
177	ZIO_STAGE_VDEV_IO_ASSESS)
178
179#define	ZIO_VDEV_CHILD_PIPELINE			\
180	(ZIO_VDEV_IO_STAGES |			\
181	ZIO_STAGE_DONE)
182
183#define	ZIO_READ_COMMON_STAGES			\
184	(ZIO_INTERLOCK_STAGES |			\
185	ZIO_VDEV_IO_STAGES |			\
186	ZIO_STAGE_CHECKSUM_VERIFY)
187
188#define	ZIO_READ_PHYS_PIPELINE			\
189	ZIO_READ_COMMON_STAGES
190
191#define	ZIO_READ_PIPELINE			\
192	(ZIO_READ_COMMON_STAGES |		\
193	ZIO_STAGE_READ_BP_INIT)
194
195#define	ZIO_DDT_CHILD_READ_PIPELINE		\
196	ZIO_READ_COMMON_STAGES
197
198#define	ZIO_DDT_READ_PIPELINE			\
199	(ZIO_INTERLOCK_STAGES |			\
200	ZIO_STAGE_READ_BP_INIT |		\
201	ZIO_STAGE_DDT_READ_START |		\
202	ZIO_STAGE_DDT_READ_DONE)
203
204#define	ZIO_WRITE_COMMON_STAGES			\
205	(ZIO_INTERLOCK_STAGES |			\
206	ZIO_VDEV_IO_STAGES |			\
207	ZIO_STAGE_ISSUE_ASYNC |			\
208	ZIO_STAGE_CHECKSUM_GENERATE)
209
210#define	ZIO_WRITE_PHYS_PIPELINE			\
211	ZIO_WRITE_COMMON_STAGES
212
213#define	ZIO_REWRITE_PIPELINE			\
214	(ZIO_WRITE_COMMON_STAGES |		\
215	ZIO_STAGE_WRITE_COMPRESS |		\
216	ZIO_STAGE_ENCRYPT |			\
217	ZIO_STAGE_WRITE_BP_INIT)
218
219#define	ZIO_WRITE_PIPELINE			\
220	(ZIO_WRITE_COMMON_STAGES |		\
221	ZIO_STAGE_WRITE_BP_INIT |		\
222	ZIO_STAGE_WRITE_COMPRESS |		\
223	ZIO_STAGE_ENCRYPT |			\
224	ZIO_STAGE_DVA_THROTTLE |		\
225	ZIO_STAGE_DVA_ALLOCATE)
226
227#define	ZIO_DDT_CHILD_WRITE_PIPELINE		\
228	(ZIO_INTERLOCK_STAGES |			\
229	ZIO_VDEV_IO_STAGES |			\
230	ZIO_STAGE_DVA_THROTTLE |		\
231	ZIO_STAGE_DVA_ALLOCATE)
232
233#define	ZIO_DDT_WRITE_PIPELINE			\
234	(ZIO_INTERLOCK_STAGES |			\
235	ZIO_STAGE_WRITE_BP_INIT |		\
236	ZIO_STAGE_ISSUE_ASYNC |			\
237	ZIO_STAGE_WRITE_COMPRESS |		\
238	ZIO_STAGE_ENCRYPT |			\
239	ZIO_STAGE_CHECKSUM_GENERATE |		\
240	ZIO_STAGE_DDT_WRITE)
241
242#define	ZIO_GANG_STAGES				\
243	(ZIO_STAGE_GANG_ASSEMBLE |		\
244	ZIO_STAGE_GANG_ISSUE)
245
246#define	ZIO_FREE_PIPELINE			\
247	(ZIO_INTERLOCK_STAGES |			\
248	ZIO_STAGE_FREE_BP_INIT |		\
249	ZIO_STAGE_BRT_FREE |			\
250	ZIO_STAGE_DVA_FREE)
251
252#define	ZIO_DDT_FREE_PIPELINE			\
253	(ZIO_INTERLOCK_STAGES |			\
254	ZIO_STAGE_FREE_BP_INIT |		\
255	ZIO_STAGE_ISSUE_ASYNC |			\
256	ZIO_STAGE_DDT_FREE)
257
258#define	ZIO_CLAIM_PIPELINE			\
259	(ZIO_INTERLOCK_STAGES |			\
260	ZIO_STAGE_DVA_CLAIM)
261
262#define	ZIO_FLUSH_PIPELINE			\
263	(ZIO_INTERLOCK_STAGES |			\
264	ZIO_VDEV_IO_STAGES)
265
266#define	ZIO_TRIM_PIPELINE			\
267	(ZIO_INTERLOCK_STAGES |			\
268	ZIO_STAGE_ISSUE_ASYNC |			\
269	ZIO_VDEV_IO_STAGES)
270
271#define	ZIO_BLOCKING_STAGES			\
272	(ZIO_STAGE_DVA_ALLOCATE |		\
273	ZIO_STAGE_DVA_CLAIM |			\
274	ZIO_STAGE_VDEV_IO_START)
275
276extern void zio_inject_init(void);
277extern void zio_inject_fini(void);
278
279#ifdef	__cplusplus
280}
281#endif
282
283#endif	/* _ZIO_IMPL_H */
284