1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (C) 2016 Gvozden Neskovic <neskovic@compeng.uni-frankfurt.de>.
23 */
24
25#ifndef _SYS_VDEV_RAIDZ_H
26#define	_SYS_VDEV_RAIDZ_H
27
28#include <sys/types.h>
29#include <sys/zfs_rlock.h>
30
31#ifdef	__cplusplus
32extern "C" {
33#endif
34
35struct zio;
36struct raidz_col;
37struct raidz_row;
38struct raidz_map;
39struct vdev_raidz;
40struct uberblock;
41#if !defined(_KERNEL)
42struct kernel_param {};
43#endif
44
45/*
46 * vdev_raidz interface
47 */
48struct raidz_map *vdev_raidz_map_alloc(struct zio *, uint64_t, uint64_t,
49    uint64_t);
50struct raidz_map *vdev_raidz_map_alloc_expanded(struct zio *,
51    uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, boolean_t);
52void vdev_raidz_map_free(struct raidz_map *);
53void vdev_raidz_free(struct vdev_raidz *);
54void vdev_raidz_generate_parity_row(struct raidz_map *, struct raidz_row *);
55void vdev_raidz_generate_parity(struct raidz_map *);
56void vdev_raidz_reconstruct(struct raidz_map *, const int *, int);
57void vdev_raidz_child_done(zio_t *);
58void vdev_raidz_io_done(zio_t *);
59void vdev_raidz_checksum_error(zio_t *, struct raidz_col *, abd_t *);
60struct raidz_row *vdev_raidz_row_alloc(int);
61void vdev_raidz_reflow_copy_scratch(spa_t *);
62void raidz_dtl_reassessed(vdev_t *);
63
64extern const zio_vsd_ops_t vdev_raidz_vsd_ops;
65
66/*
67 * vdev_raidz_math interface
68 */
69void vdev_raidz_math_init(void);
70void vdev_raidz_math_fini(void);
71const struct raidz_impl_ops *vdev_raidz_math_get_ops(void);
72int vdev_raidz_math_generate(struct raidz_map *, struct raidz_row *);
73int vdev_raidz_math_reconstruct(struct raidz_map *, struct raidz_row *,
74    const int *, const int *, const int);
75int vdev_raidz_impl_set(const char *);
76
77typedef struct vdev_raidz_expand {
78	uint64_t vre_vdev_id;
79
80	kmutex_t vre_lock;
81	kcondvar_t vre_cv;
82
83	/*
84	 * How much i/o is outstanding (issued and not completed).
85	 */
86	uint64_t vre_outstanding_bytes;
87
88	/*
89	 * Next offset to issue i/o for.
90	 */
91	uint64_t vre_offset;
92
93	/*
94	 * Lowest offset of a failed expansion i/o.  The expansion will retry
95	 * from here.  Once the expansion thread notices the failure and exits,
96	 * vre_failed_offset is reset back to UINT64_MAX, and
97	 * vre_waiting_for_resilver will be set.
98	 */
99	uint64_t vre_failed_offset;
100	boolean_t vre_waiting_for_resilver;
101
102	/*
103	 * Offset that is completing each txg
104	 */
105	uint64_t vre_offset_pertxg[TXG_SIZE];
106
107	/*
108	 * Bytes copied in each txg.
109	 */
110	uint64_t vre_bytes_copied_pertxg[TXG_SIZE];
111
112	/*
113	 * The rangelock prevents normal read/write zio's from happening while
114	 * there are expansion (reflow) i/os in progress to the same offsets.
115	 */
116	zfs_rangelock_t vre_rangelock;
117
118	/*
119	 * These fields are stored on-disk in the vdev_top_zap:
120	 */
121	dsl_scan_state_t vre_state;
122	uint64_t vre_start_time;
123	uint64_t vre_end_time;
124	uint64_t vre_bytes_copied;
125} vdev_raidz_expand_t;
126
127typedef struct vdev_raidz {
128	/*
129	 * Number of child vdevs when this raidz vdev was created (i.e. before
130	 * any raidz expansions).
131	 */
132	int vd_original_width;
133
134	/*
135	 * The current number of child vdevs, which may be more than the
136	 * original width if an expansion is in progress or has completed.
137	 */
138	int vd_physical_width;
139
140	int vd_nparity;
141
142	/*
143	 * Tree of reflow_node_t's.  The lock protects the avl tree only.
144	 * The reflow_node_t's describe completed expansions, and are used
145	 * to determine the logical width given a block's birth time.
146	 */
147	avl_tree_t vd_expand_txgs;
148	kmutex_t vd_expand_lock;
149
150	/*
151	 * If this vdev is being expanded, spa_raidz_expand is set to this
152	 */
153	vdev_raidz_expand_t vn_vre;
154} vdev_raidz_t;
155
156extern int vdev_raidz_attach_check(vdev_t *);
157extern void vdev_raidz_attach_sync(void *, dmu_tx_t *);
158extern void spa_start_raidz_expansion_thread(spa_t *);
159extern int spa_raidz_expand_get_stats(spa_t *, pool_raidz_expand_stat_t *);
160extern int vdev_raidz_load(vdev_t *);
161
162/* RAIDZ scratch area pause points (for testing) */
163#define	RAIDZ_EXPAND_PAUSE_NONE	0
164#define	RAIDZ_EXPAND_PAUSE_PRE_SCRATCH_1 1
165#define	RAIDZ_EXPAND_PAUSE_PRE_SCRATCH_2 2
166#define	RAIDZ_EXPAND_PAUSE_PRE_SCRATCH_3 3
167#define	RAIDZ_EXPAND_PAUSE_SCRATCH_VALID 4
168#define	RAIDZ_EXPAND_PAUSE_SCRATCH_REFLOWED 5
169#define	RAIDZ_EXPAND_PAUSE_SCRATCH_POST_REFLOW_1 6
170#define	RAIDZ_EXPAND_PAUSE_SCRATCH_POST_REFLOW_2 7
171
172#ifdef	__cplusplus
173}
174#endif
175
176#endif /* _SYS_VDEV_RAIDZ_H */
177