1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/*
27 * Copyright (c) 2013, 2019 by Delphix. All rights reserved.
28 */
29
30#ifndef _SYS_RANGE_TREE_H
31#define	_SYS_RANGE_TREE_H
32
33#include <sys/btree.h>
34#include <sys/dmu.h>
35
36#ifdef	__cplusplus
37extern "C" {
38#endif
39
40#define	RANGE_TREE_HISTOGRAM_SIZE	64
41
42typedef struct range_tree_ops range_tree_ops_t;
43
44typedef enum range_seg_type {
45	RANGE_SEG32,
46	RANGE_SEG64,
47	RANGE_SEG_GAP,
48	RANGE_SEG_NUM_TYPES,
49} range_seg_type_t;
50
51/*
52 * Note: the range_tree may not be accessed concurrently; consumers
53 * must provide external locking if required.
54 */
55typedef struct range_tree {
56	zfs_btree_t	rt_root;	/* offset-ordered segment b-tree */
57	uint64_t	rt_space;	/* sum of all segments in the map */
58	range_seg_type_t rt_type;	/* type of range_seg_t in use */
59	/*
60	 * All data that is stored in the range tree must have a start higher
61	 * than or equal to rt_start, and all sizes and offsets must be
62	 * multiples of 1 << rt_shift.
63	 */
64	uint8_t		rt_shift;
65	uint64_t	rt_start;
66	const range_tree_ops_t *rt_ops;
67	void		*rt_arg;
68	uint64_t	rt_gap;		/* allowable inter-segment gap */
69
70	/*
71	 * The rt_histogram maintains a histogram of ranges. Each bucket,
72	 * rt_histogram[i], contains the number of ranges whose size is:
73	 * 2^i <= size of range in bytes < 2^(i+1)
74	 */
75	uint64_t	rt_histogram[RANGE_TREE_HISTOGRAM_SIZE];
76} range_tree_t;
77
78typedef struct range_seg32 {
79	uint32_t	rs_start;	/* starting offset of this segment */
80	uint32_t	rs_end;		/* ending offset (non-inclusive) */
81} range_seg32_t;
82
83/*
84 * Extremely large metaslabs, vdev-wide trees, and dnode-wide trees may
85 * require 64-bit integers for ranges.
86 */
87typedef struct range_seg64 {
88	uint64_t	rs_start;	/* starting offset of this segment */
89	uint64_t	rs_end;		/* ending offset (non-inclusive) */
90} range_seg64_t;
91
92typedef struct range_seg_gap {
93	uint64_t	rs_start;	/* starting offset of this segment */
94	uint64_t	rs_end;		/* ending offset (non-inclusive) */
95	uint64_t	rs_fill;	/* actual fill if gap mode is on */
96} range_seg_gap_t;
97
98/*
99 * This type needs to be the largest of the range segs, since it will be stack
100 * allocated and then cast the actual type to do tree operations.
101 */
102typedef range_seg_gap_t range_seg_max_t;
103
104/*
105 * This is just for clarity of code purposes, so we can make it clear that a
106 * pointer is to a range seg of some type; when we need to do the actual math,
107 * we'll figure out the real type.
108 */
109typedef void range_seg_t;
110
111struct range_tree_ops {
112	void    (*rtop_create)(range_tree_t *rt, void *arg);
113	void    (*rtop_destroy)(range_tree_t *rt, void *arg);
114	void	(*rtop_add)(range_tree_t *rt, void *rs, void *arg);
115	void    (*rtop_remove)(range_tree_t *rt, void *rs, void *arg);
116	void	(*rtop_vacate)(range_tree_t *rt, void *arg);
117};
118
119static inline uint64_t
120rs_get_start_raw(const range_seg_t *rs, const range_tree_t *rt)
121{
122	ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES);
123	switch (rt->rt_type) {
124	case RANGE_SEG32:
125		return (((const range_seg32_t *)rs)->rs_start);
126	case RANGE_SEG64:
127		return (((const range_seg64_t *)rs)->rs_start);
128	case RANGE_SEG_GAP:
129		return (((const range_seg_gap_t *)rs)->rs_start);
130	default:
131		VERIFY(0);
132		return (0);
133	}
134}
135
136static inline uint64_t
137rs_get_end_raw(const range_seg_t *rs, const range_tree_t *rt)
138{
139	ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES);
140	switch (rt->rt_type) {
141	case RANGE_SEG32:
142		return (((const range_seg32_t *)rs)->rs_end);
143	case RANGE_SEG64:
144		return (((const range_seg64_t *)rs)->rs_end);
145	case RANGE_SEG_GAP:
146		return (((const range_seg_gap_t *)rs)->rs_end);
147	default:
148		VERIFY(0);
149		return (0);
150	}
151}
152
153static inline uint64_t
154rs_get_fill_raw(const range_seg_t *rs, const range_tree_t *rt)
155{
156	ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES);
157	switch (rt->rt_type) {
158	case RANGE_SEG32: {
159		const range_seg32_t *r32 = (const range_seg32_t *)rs;
160		return (r32->rs_end - r32->rs_start);
161	}
162	case RANGE_SEG64: {
163		const range_seg64_t *r64 = (const range_seg64_t *)rs;
164		return (r64->rs_end - r64->rs_start);
165	}
166	case RANGE_SEG_GAP:
167		return (((const range_seg_gap_t *)rs)->rs_fill);
168	default:
169		VERIFY(0);
170		return (0);
171	}
172
173}
174
175static inline uint64_t
176rs_get_start(const range_seg_t *rs, const range_tree_t *rt)
177{
178	return ((rs_get_start_raw(rs, rt) << rt->rt_shift) + rt->rt_start);
179}
180
181static inline uint64_t
182rs_get_end(const range_seg_t *rs, const range_tree_t *rt)
183{
184	return ((rs_get_end_raw(rs, rt) << rt->rt_shift) + rt->rt_start);
185}
186
187static inline uint64_t
188rs_get_fill(const range_seg_t *rs, const range_tree_t *rt)
189{
190	return (rs_get_fill_raw(rs, rt) << rt->rt_shift);
191}
192
193static inline void
194rs_set_start_raw(range_seg_t *rs, range_tree_t *rt, uint64_t start)
195{
196	ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES);
197	switch (rt->rt_type) {
198	case RANGE_SEG32:
199		ASSERT3U(start, <=, UINT32_MAX);
200		((range_seg32_t *)rs)->rs_start = (uint32_t)start;
201		break;
202	case RANGE_SEG64:
203		((range_seg64_t *)rs)->rs_start = start;
204		break;
205	case RANGE_SEG_GAP:
206		((range_seg_gap_t *)rs)->rs_start = start;
207		break;
208	default:
209		VERIFY(0);
210	}
211}
212
213static inline void
214rs_set_end_raw(range_seg_t *rs, range_tree_t *rt, uint64_t end)
215{
216	ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES);
217	switch (rt->rt_type) {
218	case RANGE_SEG32:
219		ASSERT3U(end, <=, UINT32_MAX);
220		((range_seg32_t *)rs)->rs_end = (uint32_t)end;
221		break;
222	case RANGE_SEG64:
223		((range_seg64_t *)rs)->rs_end = end;
224		break;
225	case RANGE_SEG_GAP:
226		((range_seg_gap_t *)rs)->rs_end = end;
227		break;
228	default:
229		VERIFY(0);
230	}
231}
232
233static inline void
234rs_set_fill_raw(range_seg_t *rs, range_tree_t *rt, uint64_t fill)
235{
236	ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES);
237	switch (rt->rt_type) {
238	case RANGE_SEG32:
239		/* fall through */
240	case RANGE_SEG64:
241		ASSERT3U(fill, ==, rs_get_end_raw(rs, rt) - rs_get_start_raw(rs,
242		    rt));
243		break;
244	case RANGE_SEG_GAP:
245		((range_seg_gap_t *)rs)->rs_fill = fill;
246		break;
247	default:
248		VERIFY(0);
249	}
250}
251
252static inline void
253rs_set_start(range_seg_t *rs, range_tree_t *rt, uint64_t start)
254{
255	ASSERT3U(start, >=, rt->rt_start);
256	ASSERT(IS_P2ALIGNED(start, 1ULL << rt->rt_shift));
257	rs_set_start_raw(rs, rt, (start - rt->rt_start) >> rt->rt_shift);
258}
259
260static inline void
261rs_set_end(range_seg_t *rs, range_tree_t *rt, uint64_t end)
262{
263	ASSERT3U(end, >=, rt->rt_start);
264	ASSERT(IS_P2ALIGNED(end, 1ULL << rt->rt_shift));
265	rs_set_end_raw(rs, rt, (end - rt->rt_start) >> rt->rt_shift);
266}
267
268static inline void
269rs_set_fill(range_seg_t *rs, range_tree_t *rt, uint64_t fill)
270{
271	ASSERT(IS_P2ALIGNED(fill, 1ULL << rt->rt_shift));
272	rs_set_fill_raw(rs, rt, fill >> rt->rt_shift);
273}
274
275typedef void range_tree_func_t(void *arg, uint64_t start, uint64_t size);
276
277range_tree_t *range_tree_create_gap(const range_tree_ops_t *ops,
278    range_seg_type_t type, void *arg, uint64_t start, uint64_t shift,
279    uint64_t gap);
280range_tree_t *range_tree_create(const range_tree_ops_t *ops,
281    range_seg_type_t type, void *arg, uint64_t start, uint64_t shift);
282void range_tree_destroy(range_tree_t *rt);
283boolean_t range_tree_contains(range_tree_t *rt, uint64_t start, uint64_t size);
284range_seg_t *range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size);
285boolean_t range_tree_find_in(range_tree_t *rt, uint64_t start, uint64_t size,
286    uint64_t *ostart, uint64_t *osize);
287void range_tree_verify_not_present(range_tree_t *rt,
288    uint64_t start, uint64_t size);
289void range_tree_resize_segment(range_tree_t *rt, range_seg_t *rs,
290    uint64_t newstart, uint64_t newsize);
291uint64_t range_tree_space(range_tree_t *rt);
292uint64_t range_tree_numsegs(range_tree_t *rt);
293boolean_t range_tree_is_empty(range_tree_t *rt);
294void range_tree_swap(range_tree_t **rtsrc, range_tree_t **rtdst);
295void range_tree_stat_verify(range_tree_t *rt);
296uint64_t range_tree_min(range_tree_t *rt);
297uint64_t range_tree_max(range_tree_t *rt);
298uint64_t range_tree_span(range_tree_t *rt);
299
300void range_tree_add(void *arg, uint64_t start, uint64_t size);
301void range_tree_remove(void *arg, uint64_t start, uint64_t size);
302void range_tree_remove_fill(range_tree_t *rt, uint64_t start, uint64_t size);
303void range_tree_adjust_fill(range_tree_t *rt, range_seg_t *rs, int64_t delta);
304void range_tree_clear(range_tree_t *rt, uint64_t start, uint64_t size);
305
306void range_tree_vacate(range_tree_t *rt, range_tree_func_t *func, void *arg);
307void range_tree_walk(range_tree_t *rt, range_tree_func_t *func, void *arg);
308range_seg_t *range_tree_first(range_tree_t *rt);
309
310void range_tree_remove_xor_add_segment(uint64_t start, uint64_t end,
311    range_tree_t *removefrom, range_tree_t *addto);
312void range_tree_remove_xor_add(range_tree_t *rt, range_tree_t *removefrom,
313    range_tree_t *addto);
314
315#ifdef	__cplusplus
316}
317#endif
318
319#endif	/* _SYS_RANGE_TREE_H */
320