range_tree.c revision 339034
1167514Skmacy/*
2167514Skmacy * CDDL HEADER START
3189643Sgnn *
4167514Skmacy * The contents of this file are subject to the terms of the
5167514Skmacy * Common Development and Distribution License (the "License").
6167514Skmacy * You may not use this file except in compliance with the License.
7167514Skmacy *
8167514Skmacy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9167514Skmacy * or http://www.opensolaris.org/os/licensing.
10167514Skmacy * See the License for the specific language governing permissions
11167514Skmacy * and limitations under the License.
12169978Skmacy *
13167514Skmacy * When distributing Covered Code, include this CDDL HEADER in each
14167514Skmacy * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15169978Skmacy * If applicable, add the following below this CDDL HEADER, with the
16167514Skmacy * fields enclosed by brackets "[]" replaced with your own identifying
17167514Skmacy * information: Portions Copyright [yyyy] [name of copyright owner]
18167514Skmacy *
19167514Skmacy * CDDL HEADER END
20167514Skmacy */
21167514Skmacy/*
22167514Skmacy * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23167514Skmacy * Use is subject to license terms.
24167514Skmacy */
25167514Skmacy/*
26167514Skmacy * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
27167514Skmacy */
28167514Skmacy
29167514Skmacy#include <sys/zfs_context.h>
30167514Skmacy#include <sys/spa.h>
31167514Skmacy#include <sys/dmu.h>
32167514Skmacy#include <sys/dnode.h>
33235963Sbz#include <sys/zio.h>
34205947Snp#include <sys/range_tree.h>
35205947Snp
36167514Skmacy/*
37167514Skmacy * Range trees are tree-based data structures that can be used to
38167514Skmacy * track free space or generally any space allocation information.
39167514Skmacy * A range tree keeps track of individual segments and automatically
40167514Skmacy * provides facilities such as adjacent extent merging and extent
41167514Skmacy * splitting in response to range add/remove requests.
42167514Skmacy *
43167514Skmacy * A range tree starts out completely empty, with no segments in it.
44167514Skmacy * Adding an allocation via range_tree_add to the range tree can either:
45167514Skmacy * 1) create a new extent
46167514Skmacy * 2) extend an adjacent extent
47167514Skmacy * 3) merge two adjacent extents
48167514Skmacy * Conversely, removing an allocation via range_tree_remove can:
49167514Skmacy * 1) completely remove an extent
50167514Skmacy * 2) shorten an extent (if the allocation was near one of its ends)
51175200Skmacy * 3) split an extent into two extents, in effect punching a hole
52167514Skmacy *
53167514Skmacy * A range tree is also capable of 'bridging' gaps when adding
54167760Skmacy * allocations. This is useful for cases when close proximity of
55174708Skmacy * allocations is an important detail that needs to be represented
56204348Snp * in the range tree. See range_tree_set_gap(). The default behavior
57237263Snp * is not to bridge gaps (i.e. the maximum allowed gap size is 0).
58167514Skmacy *
59257241Sglebius * In order to traverse a range tree, use either the range_tree_walk()
60257241Sglebius * or range_tree_vacate() functions.
61194521Skmacy *
62204348Snp * To obtain more accurate information on individual segment
63204348Snp * operations that the range tree performs "under the hood", you can
64194521Skmacy * specify a set of callbacks by passing a range_tree_ops_t structure
65167514Skmacy * to the range_tree_create function. Any callbacks that are non-NULL
66167514Skmacy * are then called at the appropriate times.
67167514Skmacy *
68231317Snp * The range tree code also supports a special variant of range trees
69167514Skmacy * that can bridge small gaps between segments. This kind of tree is used
70167514Skmacy * by the dsl scanning code to group I/Os into mostly sequential chunks to
71167514Skmacy * optimize disk performance. The code here attempts to do this with as
72167514Skmacy * little memory and computational overhead as possible. One limitation of
73167514Skmacy * this implementation is that segments of range trees with gaps can only
74174670Skmacy * support removing complete segments.
75174708Skmacy */
76174672Skmacy
77170076Skmacykmem_cache_t *range_seg_cache;
78174670Skmacy
79168491Skmacy/* Generic ops for managing an AVL tree alongside a range tree */
80194521Skmacystruct range_tree_ops rt_avl_ops = {
81194521Skmacy	.rtop_create = rt_avl_create,
82194521Skmacy	.rtop_destroy = rt_avl_destroy,
83237263Snp	.rtop_add = rt_avl_add,
84237263Snp	.rtop_remove = rt_avl_remove,
85237263Snp	.rtop_vacate = rt_avl_vacate,
86237263Snp};
87194521Skmacy
88194521Skmacyvoid
89217321Smdfrange_tree_init(void)
90194521Skmacy{
91194521Skmacy	ASSERT(range_seg_cache == NULL);
92194521Skmacy	range_seg_cache = kmem_cache_create("range_seg_cache",
93267992Shselasky	    sizeof (range_seg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
94194521Skmacy}
95194521Skmacy
96194521Skmacyvoid
97194521Skmacyrange_tree_fini(void)
98194521Skmacy{
99194521Skmacy	kmem_cache_destroy(range_seg_cache);
100194521Skmacy	range_seg_cache = NULL;
101194521Skmacy}
102194521Skmacy
103194521Skmacyvoid
104194521Skmacyrange_tree_stat_verify(range_tree_t *rt)
105194521Skmacy{
106194521Skmacy	range_seg_t *rs;
107267992Shselasky	uint64_t hist[RANGE_TREE_HISTOGRAM_SIZE] = { 0 };
108194521Skmacy	int i;
109194521Skmacy
110194521Skmacy	for (rs = avl_first(&rt->rt_root); rs != NULL;
111267992Shselasky	    rs = AVL_NEXT(&rt->rt_root, rs)) {
112194521Skmacy		uint64_t size = rs->rs_end - rs->rs_start;
113194521Skmacy		int idx	= highbit64(size) - 1;
114194521Skmacy
115267992Shselasky		hist[idx]++;
116194521Skmacy		ASSERT3U(hist[idx], !=, 0);
117194521Skmacy	}
118194521Skmacy
119176472Skmacy	for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) {
120176472Skmacy		if (hist[i] != rt->rt_histogram[i]) {
121176472Skmacy			zfs_dbgmsg("i=%d, hist=%p, hist=%llu, rt_hist=%llu",
122176472Skmacy			    i, hist, hist[i], rt->rt_histogram[i]);
123176472Skmacy		}
124177464Skmacy		VERIFY3U(hist[i], ==, rt->rt_histogram[i]);
125175200Skmacy	}
126205950Snp}
127177464Skmacy
128177464Skmacystatic void
129168737Skmacyrange_tree_stat_incr(range_tree_t *rt, range_seg_t *rs)
130167514Skmacy{
131167514Skmacy	uint64_t size = rs->rs_end - rs->rs_start;
132167514Skmacy	int idx = highbit64(size) - 1;
133167514Skmacy
134169978Skmacy	ASSERT(size != 0);
135167514Skmacy	ASSERT3U(idx, <,
136167514Skmacy	    sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram));
137167514Skmacy
138167514Skmacy	rt->rt_histogram[idx]++;
139167514Skmacy	ASSERT3U(rt->rt_histogram[idx], !=, 0);
140170038Skmacy}
141167514Skmacy
142167514Skmacystatic void
143167514Skmacyrange_tree_stat_decr(range_tree_t *rt, range_seg_t *rs)
144167514Skmacy{
145167514Skmacy	uint64_t size = rs->rs_end - rs->rs_start;
146167514Skmacy	int idx = highbit64(size) - 1;
147167514Skmacy
148167514Skmacy	ASSERT(size != 0);
149167514Skmacy	ASSERT3U(idx, <,
150167514Skmacy	    sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram));
151167514Skmacy
152167514Skmacy	ASSERT3U(rt->rt_histogram[idx], !=, 0);
153167514Skmacy	rt->rt_histogram[idx]--;
154167514Skmacy}
155167514Skmacy
156167514Skmacy/*
157167514Skmacy * NOTE: caller is responsible for all locking.
158167514Skmacy */
159201758Smbrstatic int
160167514Skmacyrange_tree_seg_compare(const void *x1, const void *x2)
161167514Skmacy{
162167514Skmacy	const range_seg_t *r1 = (const range_seg_t *)x1;
163167514Skmacy	const range_seg_t *r2 = (const range_seg_t *)x2;
164167514Skmacy
165167514Skmacy	ASSERT3U(r1->rs_start, <=, r1->rs_end);
166167514Skmacy	ASSERT3U(r2->rs_start, <=, r2->rs_end);
167167514Skmacy
168167514Skmacy	return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start));
169167514Skmacy}
170168737Skmacy
171167514Skmacyrange_tree_t *
172167514Skmacyrange_tree_create_impl(range_tree_ops_t *ops, void *arg,
173167514Skmacy    int (*avl_compare) (const void *, const void *), uint64_t gap)
174167514Skmacy{
175167514Skmacy	range_tree_t *rt = kmem_zalloc(sizeof (range_tree_t), KM_SLEEP);
176167514Skmacy
177167514Skmacy	avl_create(&rt->rt_root, range_tree_seg_compare,
178167514Skmacy	    sizeof (range_seg_t), offsetof(range_seg_t, rs_node));
179167514Skmacy
180194521Skmacy	rt->rt_ops = ops;
181167514Skmacy	rt->rt_arg = arg;
182167514Skmacy	rt->rt_gap = gap;
183167514Skmacy	rt->rt_avl_compare = avl_compare;
184167514Skmacy
185167514Skmacy	if (rt->rt_ops != NULL && rt->rt_ops->rtop_create != NULL)
186194521Skmacy		rt->rt_ops->rtop_create(rt, rt->rt_arg);
187194521Skmacy
188194521Skmacy	return (rt);
189194521Skmacy}
190167514Skmacy
191167514Skmacyrange_tree_t *
192167514Skmacyrange_tree_create(range_tree_ops_t *ops, void *arg)
193194521Skmacy{
194194521Skmacy	return (range_tree_create_impl(ops, arg, NULL, 0));
195194521Skmacy}
196167514Skmacy
197167514Skmacyvoid
198168351Skmacyrange_tree_destroy(range_tree_t *rt)
199168351Skmacy{
200168351Skmacy	VERIFY0(rt->rt_space);
201168351Skmacy
202168351Skmacy	if (rt->rt_ops != NULL && rt->rt_ops->rtop_destroy != NULL)
203168351Skmacy		rt->rt_ops->rtop_destroy(rt, rt->rt_arg);
204194521Skmacy
205167514Skmacy	avl_destroy(&rt->rt_root);
206167514Skmacy	kmem_free(rt, sizeof (*rt));
207167514Skmacy}
208167514Skmacy
209167514Skmacyvoid
210167514Skmacyrange_tree_adjust_fill(range_tree_t *rt, range_seg_t *rs, int64_t delta)
211167514Skmacy{
212167514Skmacy	ASSERT3U(rs->rs_fill + delta, !=, 0);
213167514Skmacy	ASSERT3U(rs->rs_fill + delta, <=, rs->rs_end - rs->rs_start);
214167514Skmacy
215167514Skmacy	if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL)
216167514Skmacy		rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg);
217167514Skmacy	rs->rs_fill += delta;
218167514Skmacy	if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL)
219167514Skmacy		rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
220167514Skmacy}
221167514Skmacy
222167514Skmacystatic void
223167514Skmacyrange_tree_add_impl(void *arg, uint64_t start, uint64_t size, uint64_t fill)
224167514Skmacy{
225167514Skmacy	range_tree_t *rt = arg;
226167514Skmacy	avl_index_t where;
227167514Skmacy	range_seg_t rsearch, *rs_before, *rs_after, *rs;
228167514Skmacy	uint64_t end = start + size, gap = rt->rt_gap;
229167514Skmacy	uint64_t bridge_size = 0;
230194521Skmacy	boolean_t merge_before, merge_after;
231194521Skmacy
232194521Skmacy	ASSERT3U(size, !=, 0);
233194521Skmacy	ASSERT3U(fill, <=, size);
234194521Skmacy
235203834Smlaier	rsearch.rs_start = start;
236203834Smlaier	rsearch.rs_end = end;
237194521Skmacy	rs = avl_find(&rt->rt_root, &rsearch, &where);
238194521Skmacy
239194521Skmacy	if (gap == 0 && rs != NULL &&
240194521Skmacy	    rs->rs_start <= start && rs->rs_end >= end) {
241194521Skmacy		zfs_panic_recover("zfs: allocating allocated segment"
242167514Skmacy		    "(offset=%llu size=%llu) of (offset=%llu size=%llu)\n",
243167514Skmacy		    (longlong_t)start, (longlong_t)size,
244167514Skmacy		    (longlong_t)rs->rs_start,
245167514Skmacy		    (longlong_t)rs->rs_end - rs->rs_start);
246167514Skmacy		return;
247171335Skmacy	}
248194521Skmacy
249167514Skmacy	/*
250194521Skmacy	 * If this is a gap-supporting range tree, it is possible that we
251194521Skmacy	 * are inserting into an existing segment. In this case simply
252194521Skmacy	 * bump the fill count and call the remove / add callbacks. If the
253194521Skmacy	 * new range will extend an existing segment, we remove the
254194521Skmacy	 * existing one, apply the new extent to it and re-insert it using
255194521Skmacy	 * the normal code paths.
256194521Skmacy	 */
257194521Skmacy	if (rs != NULL) {
258194521Skmacy		ASSERT3U(gap, !=, 0);
259194521Skmacy		if (rs->rs_start <= start && rs->rs_end >= end) {
260194521Skmacy			range_tree_adjust_fill(rt, rs, fill);
261194521Skmacy			return;
262194521Skmacy		}
263194521Skmacy
264194521Skmacy		avl_remove(&rt->rt_root, rs);
265194521Skmacy		if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL)
266194521Skmacy			rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg);
267194521Skmacy
268194521Skmacy		range_tree_stat_decr(rt, rs);
269194521Skmacy		rt->rt_space -= rs->rs_end - rs->rs_start;
270194521Skmacy
271194521Skmacy		fill += rs->rs_fill;
272194521Skmacy		start = MIN(start, rs->rs_start);
273194521Skmacy		end = MAX(end, rs->rs_end);
274194521Skmacy		size = end - start;
275194521Skmacy
276194521Skmacy		range_tree_add_impl(rt, start, size, fill);
277194521Skmacy
278194521Skmacy		kmem_cache_free(range_seg_cache, rs);
279194521Skmacy		return;
280194521Skmacy	}
281194521Skmacy
282194521Skmacy	ASSERT3P(rs, ==, NULL);
283194521Skmacy
284194521Skmacy	/*
285194521Skmacy	 * Determine whether or not we will have to merge with our neighbors.
286194521Skmacy	 * If gap != 0, we might need to merge with our neighbors even if we
287194521Skmacy	 * aren't directly touching.
288194521Skmacy	 */
289194521Skmacy	rs_before = avl_nearest(&rt->rt_root, where, AVL_BEFORE);
290194521Skmacy	rs_after = avl_nearest(&rt->rt_root, where, AVL_AFTER);
291194521Skmacy
292194521Skmacy	merge_before = (rs_before != NULL && rs_before->rs_end >= start - gap);
293194521Skmacy	merge_after = (rs_after != NULL && rs_after->rs_start <= end + gap);
294194521Skmacy
295194521Skmacy	if (merge_before && gap != 0)
296194521Skmacy		bridge_size += start - rs_before->rs_end;
297194521Skmacy	if (merge_after && gap != 0)
298194521Skmacy		bridge_size += rs_after->rs_start - end;
299194521Skmacy
300194521Skmacy	if (merge_before && merge_after) {
301194521Skmacy		avl_remove(&rt->rt_root, rs_before);
302194521Skmacy		if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) {
303194521Skmacy			rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg);
304194521Skmacy			rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg);
305194521Skmacy		}
306194521Skmacy
307194521Skmacy		range_tree_stat_decr(rt, rs_before);
308194521Skmacy		range_tree_stat_decr(rt, rs_after);
309194521Skmacy
310194521Skmacy		rs_after->rs_fill += rs_before->rs_fill + fill;
311194521Skmacy		rs_after->rs_start = rs_before->rs_start;
312194521Skmacy		kmem_cache_free(range_seg_cache, rs_before);
313194521Skmacy		rs = rs_after;
314194521Skmacy	} else if (merge_before) {
315194521Skmacy		if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL)
316194521Skmacy			rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg);
317194521Skmacy
318194521Skmacy		range_tree_stat_decr(rt, rs_before);
319194521Skmacy
320194521Skmacy		rs_before->rs_fill += fill;
321194521Skmacy		rs_before->rs_end = end;
322194521Skmacy		rs = rs_before;
323194521Skmacy	} else if (merge_after) {
324194521Skmacy		if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL)
325194521Skmacy			rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg);
326194521Skmacy
327194521Skmacy		range_tree_stat_decr(rt, rs_after);
328194521Skmacy
329194521Skmacy		rs_after->rs_fill += fill;
330194521Skmacy		rs_after->rs_start = start;
331194521Skmacy		rs = rs_after;
332194521Skmacy	} else {
333194521Skmacy		rs = kmem_cache_alloc(range_seg_cache, KM_SLEEP);
334194521Skmacy
335194521Skmacy		rs->rs_fill = fill;
336194521Skmacy		rs->rs_start = start;
337194521Skmacy		rs->rs_end = end;
338194521Skmacy		avl_insert(&rt->rt_root, rs, where);
339194521Skmacy	}
340194521Skmacy
341194521Skmacy	if (gap != 0)
342194521Skmacy		ASSERT3U(rs->rs_fill, <=, rs->rs_end - rs->rs_start);
343194521Skmacy	else
344194521Skmacy		ASSERT3U(rs->rs_fill, ==, rs->rs_end - rs->rs_start);
345194521Skmacy
346194521Skmacy	if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL)
347194521Skmacy		rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
348194521Skmacy
349194521Skmacy	range_tree_stat_incr(rt, rs);
350194521Skmacy	rt->rt_space += size + bridge_size;
351194521Skmacy}
352194521Skmacy
353194521Skmacyvoid
354194521Skmacyrange_tree_add(void *arg, uint64_t start, uint64_t size)
355194521Skmacy{
356194521Skmacy	range_tree_add_impl(arg, start, size, size);
357194521Skmacy}
358194521Skmacy
359167514Skmacystatic void
360167514Skmacyrange_tree_remove_impl(range_tree_t *rt, uint64_t start, uint64_t size,
361167514Skmacy    boolean_t do_fill)
362167514Skmacy{
363167514Skmacy	avl_index_t where;
364167514Skmacy	range_seg_t rsearch, *rs, *newseg;
365167514Skmacy	uint64_t end = start + size;
366167514Skmacy	boolean_t left_over, right_over;
367167514Skmacy
368167514Skmacy	VERIFY3U(size, !=, 0);
369194521Skmacy	VERIFY3U(size, <=, rt->rt_space);
370167514Skmacy
371194521Skmacy	rsearch.rs_start = start;
372174708Skmacy	rsearch.rs_end = end;
373167514Skmacy	rs = avl_find(&rt->rt_root, &rsearch, &where);
374194521Skmacy
375194521Skmacy	/* Make sure we completely overlap with someone */
376194521Skmacy	if (rs == NULL) {
377194521Skmacy		zfs_panic_recover("zfs: freeing free segment "
378175224Skmacy		    "(offset=%llu size=%llu)",
379175224Skmacy		    (longlong_t)start, (longlong_t)size);
380194521Skmacy		return;
381194521Skmacy	}
382167514Skmacy
383194521Skmacy	/*
384174708Skmacy	 * Range trees with gap support must only remove complete segments
385174708Skmacy	 * from the tree. This allows us to maintain accurate fill accounting
386194521Skmacy	 * and to ensure that bridged sections are not leaked. If we need to
387194521Skmacy	 * remove less than the full segment, we can only adjust the fill count.
388194521Skmacy	 */
389194521Skmacy	if (rt->rt_gap != 0) {
390174708Skmacy		if (do_fill) {
391167514Skmacy			if (rs->rs_fill == size) {
392167514Skmacy				start = rs->rs_start;
393167514Skmacy				end = rs->rs_end;
394169978Skmacy				size = end - start;
395169978Skmacy			} else {
396169978Skmacy				range_tree_adjust_fill(rt, rs, -size);
397169978Skmacy				return;
398169978Skmacy			}
399169978Skmacy		} else if (rs->rs_start != start || rs->rs_end != end) {
400169978Skmacy			zfs_panic_recover("zfs: freeing partial segment of "
401169978Skmacy			    "gap tree (offset=%llu size=%llu) of "
402169978Skmacy			    "(offset=%llu size=%llu)",
403169978Skmacy			    (longlong_t)start, (longlong_t)size,
404169978Skmacy			    (longlong_t)rs->rs_start,
405169978Skmacy			    (longlong_t)rs->rs_end - rs->rs_start);
406169978Skmacy			return;
407169978Skmacy		}
408167514Skmacy	}
409167514Skmacy
410167514Skmacy	VERIFY3U(rs->rs_start, <=, start);
411167514Skmacy	VERIFY3U(rs->rs_end, >=, end);
412167514Skmacy
413167514Skmacy	left_over = (rs->rs_start != start);
414167514Skmacy	right_over = (rs->rs_end != end);
415167514Skmacy
416167514Skmacy	range_tree_stat_decr(rt, rs);
417167514Skmacy
418167514Skmacy	if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL)
419167514Skmacy		rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg);
420167514Skmacy
421167514Skmacy	if (left_over && right_over) {
422167514Skmacy		newseg = kmem_cache_alloc(range_seg_cache, KM_SLEEP);
423167514Skmacy		newseg->rs_start = end;
424167514Skmacy		newseg->rs_end = rs->rs_end;
425176472Skmacy		newseg->rs_fill = newseg->rs_end - newseg->rs_start;
426167514Skmacy		range_tree_stat_incr(rt, newseg);
427167514Skmacy
428167514Skmacy		rs->rs_end = start;
429167514Skmacy
430167514Skmacy		avl_insert_here(&rt->rt_root, newseg, rs, AVL_AFTER);
431167514Skmacy		if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL)
432167514Skmacy			rt->rt_ops->rtop_add(rt, newseg, rt->rt_arg);
433167514Skmacy	} else if (left_over) {
434167514Skmacy		rs->rs_end = start;
435167514Skmacy	} else if (right_over) {
436167514Skmacy		rs->rs_start = end;
437167514Skmacy	} else {
438167514Skmacy		avl_remove(&rt->rt_root, rs);
439167514Skmacy		kmem_cache_free(range_seg_cache, rs);
440167514Skmacy		rs = NULL;
441176472Skmacy	}
442176472Skmacy
443167514Skmacy	if (rs != NULL) {
444167514Skmacy		/*
445167514Skmacy		 * The fill of the leftover segment will always be equal to
446167514Skmacy		 * the size, since we do not support removing partial segments
447167514Skmacy		 * of range trees with gaps.
448167514Skmacy		 */
449167514Skmacy		rs->rs_fill = rs->rs_end - rs->rs_start;
450167514Skmacy		range_tree_stat_incr(rt, rs);
451167514Skmacy
452167514Skmacy		if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL)
453167514Skmacy			rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
454167514Skmacy	}
455167514Skmacy
456167514Skmacy	rt->rt_space -= size;
457167514Skmacy}
458167514Skmacy
459167514Skmacyvoid
460167514Skmacyrange_tree_remove(void *arg, uint64_t start, uint64_t size)
461167514Skmacy{
462167514Skmacy	range_tree_remove_impl(arg, start, size, B_FALSE);
463167514Skmacy}
464167514Skmacy
465167514Skmacyvoid
466167514Skmacyrange_tree_remove_fill(range_tree_t *rt, uint64_t start, uint64_t size)
467167514Skmacy{
468167514Skmacy	range_tree_remove_impl(rt, start, size, B_TRUE);
469167514Skmacy}
470171471Skmacy
471176472Skmacyvoid
472167514Skmacyrange_tree_resize_segment(range_tree_t *rt, range_seg_t *rs,
473174708Skmacy    uint64_t newstart, uint64_t newsize)
474237263Snp{
475237263Snp	int64_t delta = newsize - (rs->rs_end - rs->rs_start);
476237263Snp
477237263Snp	range_tree_stat_decr(rt, rs);
478237263Snp	if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL)
479237263Snp		rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg);
480237263Snp
481237263Snp	rs->rs_start = newstart;
482176472Skmacy	rs->rs_end = newstart + newsize;
483176472Skmacy
484237263Snp	range_tree_stat_incr(rt, rs);
485176472Skmacy	if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL)
486167514Skmacy		rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
487167514Skmacy
488167514Skmacy	rt->rt_space += delta;
489167514Skmacy}
490167514Skmacy
491167514Skmacystatic range_seg_t *
492167514Skmacyrange_tree_find_impl(range_tree_t *rt, uint64_t start, uint64_t size)
493167514Skmacy{
494176472Skmacy	avl_index_t where;
495176472Skmacy	range_seg_t rsearch;
496176472Skmacy	uint64_t end = start + size;
497176472Skmacy
498176472Skmacy	VERIFY(size != 0);
499176472Skmacy
500176472Skmacy	rsearch.rs_start = start;
501176472Skmacy	rsearch.rs_end = end;
502176472Skmacy	return (avl_find(&rt->rt_root, &rsearch, &where));
503176472Skmacy}
504176472Skmacy
505176472Skmacyrange_seg_t *
506176472Skmacyrange_tree_find(range_tree_t *rt, uint64_t start, uint64_t size)
507176472Skmacy{
508176472Skmacy	range_seg_t *rs = range_tree_find_impl(rt, start, size);
509167514Skmacy	if (rs != NULL && rs->rs_start <= start && rs->rs_end >= start + size)
510167514Skmacy		return (rs);
511167514Skmacy	return (NULL);
512167514Skmacy}
513167514Skmacy
514167514Skmacyvoid
515176472Skmacyrange_tree_verify(range_tree_t *rt, uint64_t off, uint64_t size)
516176472Skmacy{
517176472Skmacy	range_seg_t *rs;
518176472Skmacy
519176472Skmacy	rs = range_tree_find(rt, off, size);
520176472Skmacy	if (rs != NULL)
521167514Skmacy		panic("freeing free block; rs=%p", (void *)rs);
522167514Skmacy}
523167514Skmacy
524167514Skmacyboolean_t
525167514Skmacyrange_tree_contains(range_tree_t *rt, uint64_t start, uint64_t size)
526167514Skmacy{
527167514Skmacy	return (range_tree_find(rt, start, size) != NULL);
528167514Skmacy}
529167514Skmacy
530167514Skmacy/*
531167514Skmacy * Ensure that this range is not in the tree, regardless of whether
532167514Skmacy * it is currently in the tree.
533176472Skmacy */
534167514Skmacyvoid
535167514Skmacyrange_tree_clear(range_tree_t *rt, uint64_t start, uint64_t size)
536167514Skmacy{
537167514Skmacy	range_seg_t *rs;
538167514Skmacy
539167514Skmacy	if (size == 0)
540205950Snp		return;
541167514Skmacy
542205950Snp	while ((rs = range_tree_find_impl(rt, start, size)) != NULL) {
543205950Snp		uint64_t free_start = MAX(rs->rs_start, start);
544177464Skmacy		uint64_t free_end = MIN(rs->rs_end, start + size);
545177464Skmacy		range_tree_remove(rt, free_start, free_end - free_start);
546177464Skmacy	}
547177464Skmacy}
548177464Skmacy
549205950Snpvoid
550205950Snprange_tree_swap(range_tree_t **rtsrc, range_tree_t **rtdst)
551205950Snp{
552205950Snp	range_tree_t *rt;
553183059Skmacy
554205950Snp	ASSERT0(range_tree_space(*rtdst));
555177464Skmacy	ASSERT0(avl_numnodes(&(*rtdst)->rt_root));
556205950Snp
557205950Snp	rt = *rtsrc;
558177464Skmacy	*rtsrc = *rtdst;
559205950Snp	*rtdst = rt;
560205950Snp}
561177464Skmacy
562205950Snpvoid
563205950Snprange_tree_vacate(range_tree_t *rt, range_tree_func_t *func, void *arg)
564177464Skmacy{
565177464Skmacy	range_seg_t *rs;
566202678Snp	void *cookie = NULL;
567202678Snp
568202678Snp
569202678Snp	if (rt->rt_ops != NULL && rt->rt_ops->rtop_vacate != NULL)
570202678Snp		rt->rt_ops->rtop_vacate(rt, rt->rt_arg);
571202678Snp
572205950Snp	while ((rs = avl_destroy_nodes(&rt->rt_root, &cookie)) != NULL) {
573167514Skmacy		if (func != NULL)
574167514Skmacy			func(arg, rs->rs_start, rs->rs_end - rs->rs_start);
575167514Skmacy		kmem_cache_free(range_seg_cache, rs);
576167514Skmacy	}
577174708Skmacy
578180583Skmacy	bzero(rt->rt_histogram, sizeof (rt->rt_histogram));
579174708Skmacy	rt->rt_space = 0;
580174708Skmacy}
581180583Skmacy
582174708Skmacyvoid
583180583Skmacyrange_tree_walk(range_tree_t *rt, range_tree_func_t *func, void *arg)
584174708Skmacy{
585174708Skmacy	range_seg_t *rs;
586182679Skmacy
587167514Skmacy	for (rs = avl_first(&rt->rt_root); rs; rs = AVL_NEXT(&rt->rt_root, rs))
588177464Skmacy		func(arg, rs->rs_start, rs->rs_end - rs->rs_start);
589177464Skmacy}
590205950Snp
591167514Skmacyrange_seg_t *
592205950Snprange_tree_first(range_tree_t *rt)
593205950Snp{
594167514Skmacy	return (avl_first(&rt->rt_root));
595167514Skmacy}
596167514Skmacy
597167514Skmacyuint64_t
598167514Skmacyrange_tree_space(range_tree_t *rt)
599167514Skmacy{
600167514Skmacy	return (rt->rt_space);
601167514Skmacy}
602167514Skmacy
603232854Sscottl/* Generic range tree functions for maintaining segments in an AVL tree. */
604167514Skmacyvoid
605167514Skmacyrt_avl_create(range_tree_t *rt, void *arg)
606167514Skmacy{
607167514Skmacy	avl_tree_t *tree = arg;
608167514Skmacy
609167514Skmacy	avl_create(tree, rt->rt_avl_compare, sizeof (range_seg_t),
610167514Skmacy	    offsetof(range_seg_t, rs_pp_node));
611167514Skmacy}
612167514Skmacy
613167514Skmacyvoid
614167514Skmacyrt_avl_destroy(range_tree_t *rt, void *arg)
615167514Skmacy{
616167514Skmacy	avl_tree_t *tree = arg;
617167514Skmacy
618167514Skmacy	ASSERT0(avl_numnodes(tree));
619167514Skmacy	avl_destroy(tree);
620167514Skmacy}
621167514Skmacy
622167514Skmacyvoid
623167514Skmacyrt_avl_add(range_tree_t *rt, range_seg_t *rs, void *arg)
624167514Skmacy{
625167514Skmacy	avl_tree_t *tree = arg;
626167514Skmacy	avl_add(tree, rs);
627167514Skmacy}
628167514Skmacy
629167514Skmacyvoid
630167514Skmacyrt_avl_remove(range_tree_t *rt, range_seg_t *rs, void *arg)
631175200Skmacy{
632175200Skmacy	avl_tree_t *tree = arg;
633167514Skmacy	avl_remove(tree, rs);
634167514Skmacy}
635167514Skmacy
636167514Skmacyvoid
637167514Skmacyrt_avl_vacate(range_tree_t *rt, void *arg)
638167514Skmacy{
639167514Skmacy	/*
640167514Skmacy	 * Normally one would walk the tree freeing nodes along the way.
641167514Skmacy	 * Since the nodes are shared with the range trees we can avoid
642167514Skmacy	 * walking all nodes and just reinitialize the avl tree. The nodes
643167514Skmacy	 * will be freed by the range tree, so we don't want to free them here.
644167514Skmacy	 */
645167514Skmacy	rt_avl_create(rt, arg);
646167514Skmacy}
647167514Skmacy
648167514Skmacyboolean_t
649167514Skmacyrange_tree_is_empty(range_tree_t *rt)
650167514Skmacy{
651167514Skmacy	ASSERT(rt != NULL);
652167514Skmacy	return (range_tree_space(rt) == 0);
653167514Skmacy}
654167514Skmacy