1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26/* 27 * Copyright (c) 2013, 2019 by Delphix. All rights reserved. 28 */ 29 30#ifndef _SYS_RANGE_TREE_H 31#define _SYS_RANGE_TREE_H 32 33#include <sys/btree.h> 34#include <sys/dmu.h> 35 36#ifdef __cplusplus 37extern "C" { 38#endif 39 40#define RANGE_TREE_HISTOGRAM_SIZE 64 41 42typedef struct range_tree_ops range_tree_ops_t; 43 44typedef enum range_seg_type { 45 RANGE_SEG32, 46 RANGE_SEG64, 47 RANGE_SEG_GAP, 48 RANGE_SEG_NUM_TYPES, 49} range_seg_type_t; 50 51/* 52 * Note: the range_tree may not be accessed concurrently; consumers 53 * must provide external locking if required. 54 */ 55typedef struct range_tree { 56 zfs_btree_t rt_root; /* offset-ordered segment b-tree */ 57 uint64_t rt_space; /* sum of all segments in the map */ 58 range_seg_type_t rt_type; /* type of range_seg_t in use */ 59 /* 60 * All data that is stored in the range tree must have a start higher 61 * than or equal to rt_start, and all sizes and offsets must be 62 * multiples of 1 << rt_shift. 63 */ 64 uint8_t rt_shift; 65 uint64_t rt_start; 66 const range_tree_ops_t *rt_ops; 67 void *rt_arg; 68 uint64_t rt_gap; /* allowable inter-segment gap */ 69 70 /* 71 * The rt_histogram maintains a histogram of ranges. Each bucket, 72 * rt_histogram[i], contains the number of ranges whose size is: 73 * 2^i <= size of range in bytes < 2^(i+1) 74 */ 75 uint64_t rt_histogram[RANGE_TREE_HISTOGRAM_SIZE]; 76} range_tree_t; 77 78typedef struct range_seg32 { 79 uint32_t rs_start; /* starting offset of this segment */ 80 uint32_t rs_end; /* ending offset (non-inclusive) */ 81} range_seg32_t; 82 83/* 84 * Extremely large metaslabs, vdev-wide trees, and dnode-wide trees may 85 * require 64-bit integers for ranges. 86 */ 87typedef struct range_seg64 { 88 uint64_t rs_start; /* starting offset of this segment */ 89 uint64_t rs_end; /* ending offset (non-inclusive) */ 90} range_seg64_t; 91 92typedef struct range_seg_gap { 93 uint64_t rs_start; /* starting offset of this segment */ 94 uint64_t rs_end; /* ending offset (non-inclusive) */ 95 uint64_t rs_fill; /* actual fill if gap mode is on */ 96} range_seg_gap_t; 97 98/* 99 * This type needs to be the largest of the range segs, since it will be stack 100 * allocated and then cast the actual type to do tree operations. 101 */ 102typedef range_seg_gap_t range_seg_max_t; 103 104/* 105 * This is just for clarity of code purposes, so we can make it clear that a 106 * pointer is to a range seg of some type; when we need to do the actual math, 107 * we'll figure out the real type. 108 */ 109typedef void range_seg_t; 110 111struct range_tree_ops { 112 void (*rtop_create)(range_tree_t *rt, void *arg); 113 void (*rtop_destroy)(range_tree_t *rt, void *arg); 114 void (*rtop_add)(range_tree_t *rt, void *rs, void *arg); 115 void (*rtop_remove)(range_tree_t *rt, void *rs, void *arg); 116 void (*rtop_vacate)(range_tree_t *rt, void *arg); 117}; 118 119static inline uint64_t 120rs_get_start_raw(const range_seg_t *rs, const range_tree_t *rt) 121{ 122 ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES); 123 switch (rt->rt_type) { 124 case RANGE_SEG32: 125 return (((const range_seg32_t *)rs)->rs_start); 126 case RANGE_SEG64: 127 return (((const range_seg64_t *)rs)->rs_start); 128 case RANGE_SEG_GAP: 129 return (((const range_seg_gap_t *)rs)->rs_start); 130 default: 131 VERIFY(0); 132 return (0); 133 } 134} 135 136static inline uint64_t 137rs_get_end_raw(const range_seg_t *rs, const range_tree_t *rt) 138{ 139 ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES); 140 switch (rt->rt_type) { 141 case RANGE_SEG32: 142 return (((const range_seg32_t *)rs)->rs_end); 143 case RANGE_SEG64: 144 return (((const range_seg64_t *)rs)->rs_end); 145 case RANGE_SEG_GAP: 146 return (((const range_seg_gap_t *)rs)->rs_end); 147 default: 148 VERIFY(0); 149 return (0); 150 } 151} 152 153static inline uint64_t 154rs_get_fill_raw(const range_seg_t *rs, const range_tree_t *rt) 155{ 156 ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES); 157 switch (rt->rt_type) { 158 case RANGE_SEG32: { 159 const range_seg32_t *r32 = (const range_seg32_t *)rs; 160 return (r32->rs_end - r32->rs_start); 161 } 162 case RANGE_SEG64: { 163 const range_seg64_t *r64 = (const range_seg64_t *)rs; 164 return (r64->rs_end - r64->rs_start); 165 } 166 case RANGE_SEG_GAP: 167 return (((const range_seg_gap_t *)rs)->rs_fill); 168 default: 169 VERIFY(0); 170 return (0); 171 } 172 173} 174 175static inline uint64_t 176rs_get_start(const range_seg_t *rs, const range_tree_t *rt) 177{ 178 return ((rs_get_start_raw(rs, rt) << rt->rt_shift) + rt->rt_start); 179} 180 181static inline uint64_t 182rs_get_end(const range_seg_t *rs, const range_tree_t *rt) 183{ 184 return ((rs_get_end_raw(rs, rt) << rt->rt_shift) + rt->rt_start); 185} 186 187static inline uint64_t 188rs_get_fill(const range_seg_t *rs, const range_tree_t *rt) 189{ 190 return (rs_get_fill_raw(rs, rt) << rt->rt_shift); 191} 192 193static inline void 194rs_set_start_raw(range_seg_t *rs, range_tree_t *rt, uint64_t start) 195{ 196 ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES); 197 switch (rt->rt_type) { 198 case RANGE_SEG32: 199 ASSERT3U(start, <=, UINT32_MAX); 200 ((range_seg32_t *)rs)->rs_start = (uint32_t)start; 201 break; 202 case RANGE_SEG64: 203 ((range_seg64_t *)rs)->rs_start = start; 204 break; 205 case RANGE_SEG_GAP: 206 ((range_seg_gap_t *)rs)->rs_start = start; 207 break; 208 default: 209 VERIFY(0); 210 } 211} 212 213static inline void 214rs_set_end_raw(range_seg_t *rs, range_tree_t *rt, uint64_t end) 215{ 216 ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES); 217 switch (rt->rt_type) { 218 case RANGE_SEG32: 219 ASSERT3U(end, <=, UINT32_MAX); 220 ((range_seg32_t *)rs)->rs_end = (uint32_t)end; 221 break; 222 case RANGE_SEG64: 223 ((range_seg64_t *)rs)->rs_end = end; 224 break; 225 case RANGE_SEG_GAP: 226 ((range_seg_gap_t *)rs)->rs_end = end; 227 break; 228 default: 229 VERIFY(0); 230 } 231} 232 233static inline void 234rs_set_fill_raw(range_seg_t *rs, range_tree_t *rt, uint64_t fill) 235{ 236 ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES); 237 switch (rt->rt_type) { 238 case RANGE_SEG32: 239 /* fall through */ 240 case RANGE_SEG64: 241 ASSERT3U(fill, ==, rs_get_end_raw(rs, rt) - rs_get_start_raw(rs, 242 rt)); 243 break; 244 case RANGE_SEG_GAP: 245 ((range_seg_gap_t *)rs)->rs_fill = fill; 246 break; 247 default: 248 VERIFY(0); 249 } 250} 251 252static inline void 253rs_set_start(range_seg_t *rs, range_tree_t *rt, uint64_t start) 254{ 255 ASSERT3U(start, >=, rt->rt_start); 256 ASSERT(IS_P2ALIGNED(start, 1ULL << rt->rt_shift)); 257 rs_set_start_raw(rs, rt, (start - rt->rt_start) >> rt->rt_shift); 258} 259 260static inline void 261rs_set_end(range_seg_t *rs, range_tree_t *rt, uint64_t end) 262{ 263 ASSERT3U(end, >=, rt->rt_start); 264 ASSERT(IS_P2ALIGNED(end, 1ULL << rt->rt_shift)); 265 rs_set_end_raw(rs, rt, (end - rt->rt_start) >> rt->rt_shift); 266} 267 268static inline void 269rs_set_fill(range_seg_t *rs, range_tree_t *rt, uint64_t fill) 270{ 271 ASSERT(IS_P2ALIGNED(fill, 1ULL << rt->rt_shift)); 272 rs_set_fill_raw(rs, rt, fill >> rt->rt_shift); 273} 274 275typedef void range_tree_func_t(void *arg, uint64_t start, uint64_t size); 276 277range_tree_t *range_tree_create_gap(const range_tree_ops_t *ops, 278 range_seg_type_t type, void *arg, uint64_t start, uint64_t shift, 279 uint64_t gap); 280range_tree_t *range_tree_create(const range_tree_ops_t *ops, 281 range_seg_type_t type, void *arg, uint64_t start, uint64_t shift); 282void range_tree_destroy(range_tree_t *rt); 283boolean_t range_tree_contains(range_tree_t *rt, uint64_t start, uint64_t size); 284range_seg_t *range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size); 285boolean_t range_tree_find_in(range_tree_t *rt, uint64_t start, uint64_t size, 286 uint64_t *ostart, uint64_t *osize); 287void range_tree_verify_not_present(range_tree_t *rt, 288 uint64_t start, uint64_t size); 289void range_tree_resize_segment(range_tree_t *rt, range_seg_t *rs, 290 uint64_t newstart, uint64_t newsize); 291uint64_t range_tree_space(range_tree_t *rt); 292uint64_t range_tree_numsegs(range_tree_t *rt); 293boolean_t range_tree_is_empty(range_tree_t *rt); 294void range_tree_swap(range_tree_t **rtsrc, range_tree_t **rtdst); 295void range_tree_stat_verify(range_tree_t *rt); 296uint64_t range_tree_min(range_tree_t *rt); 297uint64_t range_tree_max(range_tree_t *rt); 298uint64_t range_tree_span(range_tree_t *rt); 299 300void range_tree_add(void *arg, uint64_t start, uint64_t size); 301void range_tree_remove(void *arg, uint64_t start, uint64_t size); 302void range_tree_remove_fill(range_tree_t *rt, uint64_t start, uint64_t size); 303void range_tree_adjust_fill(range_tree_t *rt, range_seg_t *rs, int64_t delta); 304void range_tree_clear(range_tree_t *rt, uint64_t start, uint64_t size); 305 306void range_tree_vacate(range_tree_t *rt, range_tree_func_t *func, void *arg); 307void range_tree_walk(range_tree_t *rt, range_tree_func_t *func, void *arg); 308range_seg_t *range_tree_first(range_tree_t *rt); 309 310void range_tree_remove_xor_add_segment(uint64_t start, uint64_t end, 311 range_tree_t *removefrom, range_tree_t *addto); 312void range_tree_remove_xor_add(range_tree_t *rt, range_tree_t *removefrom, 313 range_tree_t *addto); 314 315#ifdef __cplusplus 316} 317#endif 318 319#endif /* _SYS_RANGE_TREE_H */ 320