1258717Savg/* 2258717Savg * CDDL HEADER START 3258717Savg * 4258717Savg * The contents of this file are subject to the terms of the 5258717Savg * Common Development and Distribution License (the "License"). 6258717Savg * You may not use this file except in compliance with the License. 7258717Savg * 8258717Savg * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9258717Savg * or http://www.opensolaris.org/os/licensing. 10258717Savg * See the License for the specific language governing permissions 11258717Savg * and limitations under the License. 12258717Savg * 13258717Savg * When distributing Covered Code, include this CDDL HEADER in each 14258717Savg * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15258717Savg * If applicable, add the following below this CDDL HEADER, with the 16258717Savg * fields enclosed by brackets "[]" replaced with your own identifying 17258717Savg * information: Portions Copyright [yyyy] [name of copyright owner] 18258717Savg * 19258717Savg * CDDL HEADER END 20258717Savg */ 21258717Savg/* 22258717Savg * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23258717Savg * Use is subject to license terms. 24258717Savg */ 25258717Savg/* 26258717Savg * Copyright (c) 2013 by Delphix. All rights reserved. 27258717Savg */ 28258717Savg 29258717Savg#include <sys/zfs_context.h> 30258717Savg#include <sys/range_tree.h> 31258717Savg#include <sys/space_reftree.h> 32258717Savg 33258717Savg/* 34258717Savg * Space reference trees. 35258717Savg * 36258717Savg * A range tree is a collection of integers. Every integer is either 37258717Savg * in the tree, or it's not. A space reference tree generalizes 38258717Savg * the idea: it allows its members to have arbitrary reference counts, 39258717Savg * as opposed to the implicit reference count of 0 or 1 in a range tree. 40258717Savg * This representation comes in handy when computing the union or 41258717Savg * intersection of multiple space maps. For example, the union of 42258717Savg * N range trees is the subset of the reference tree with refcnt >= 1. 43258717Savg * The intersection of N range trees is the subset with refcnt >= N. 44258717Savg * 45258717Savg * [It's very much like a Fourier transform. Unions and intersections 46258717Savg * are hard to perform in the 'range tree domain', so we convert the trees 47258717Savg * into the 'reference count domain', where it's trivial, then invert.] 48258717Savg * 49258717Savg * vdev_dtl_reassess() uses computations of this form to determine 50258717Savg * DTL_MISSING and DTL_OUTAGE for interior vdevs -- e.g. a RAID-Z vdev 51258717Savg * has an outage wherever refcnt >= vdev_nparity + 1, and a mirror vdev 52258717Savg * has an outage wherever refcnt >= vdev_children. 53258717Savg */ 54258717Savgstatic int 55258717Savgspace_reftree_compare(const void *x1, const void *x2) 56258717Savg{ 57258717Savg const space_ref_t *sr1 = x1; 58258717Savg const space_ref_t *sr2 = x2; 59258717Savg 60258717Savg if (sr1->sr_offset < sr2->sr_offset) 61258717Savg return (-1); 62258717Savg if (sr1->sr_offset > sr2->sr_offset) 63258717Savg return (1); 64258717Savg 65258717Savg if (sr1 < sr2) 66258717Savg return (-1); 67258717Savg if (sr1 > sr2) 68258717Savg return (1); 69258717Savg 70258717Savg return (0); 71258717Savg} 72258717Savg 73258717Savgvoid 74258717Savgspace_reftree_create(avl_tree_t *t) 75258717Savg{ 76258717Savg avl_create(t, space_reftree_compare, 77258717Savg sizeof (space_ref_t), offsetof(space_ref_t, sr_node)); 78258717Savg} 79258717Savg 80258717Savgvoid 81258717Savgspace_reftree_destroy(avl_tree_t *t) 82258717Savg{ 83258717Savg space_ref_t *sr; 84258717Savg void *cookie = NULL; 85258717Savg 86258717Savg while ((sr = avl_destroy_nodes(t, &cookie)) != NULL) 87258717Savg kmem_free(sr, sizeof (*sr)); 88258717Savg 89258717Savg avl_destroy(t); 90258717Savg} 91258717Savg 92258717Savgstatic void 93258717Savgspace_reftree_add_node(avl_tree_t *t, uint64_t offset, int64_t refcnt) 94258717Savg{ 95258717Savg space_ref_t *sr; 96258717Savg 97258717Savg sr = kmem_alloc(sizeof (*sr), KM_SLEEP); 98258717Savg sr->sr_offset = offset; 99258717Savg sr->sr_refcnt = refcnt; 100258717Savg 101258717Savg avl_add(t, sr); 102258717Savg} 103258717Savg 104258717Savgvoid 105258717Savgspace_reftree_add_seg(avl_tree_t *t, uint64_t start, uint64_t end, 106258717Savg int64_t refcnt) 107258717Savg{ 108258717Savg space_reftree_add_node(t, start, refcnt); 109258717Savg space_reftree_add_node(t, end, -refcnt); 110258717Savg} 111258717Savg 112258717Savg/* 113258717Savg * Convert (or add) a range tree into a reference tree. 114258717Savg */ 115258717Savgvoid 116258717Savgspace_reftree_add_map(avl_tree_t *t, range_tree_t *rt, int64_t refcnt) 117258717Savg{ 118258717Savg range_seg_t *rs; 119258717Savg 120258717Savg ASSERT(MUTEX_HELD(rt->rt_lock)); 121258717Savg 122258717Savg for (rs = avl_first(&rt->rt_root); rs; rs = AVL_NEXT(&rt->rt_root, rs)) 123258717Savg space_reftree_add_seg(t, rs->rs_start, rs->rs_end, refcnt); 124258717Savg} 125258717Savg 126258717Savg/* 127258717Savg * Convert a reference tree into a range tree. The range tree will contain 128258717Savg * all members of the reference tree for which refcnt >= minref. 129258717Savg */ 130258717Savgvoid 131258717Savgspace_reftree_generate_map(avl_tree_t *t, range_tree_t *rt, int64_t minref) 132258717Savg{ 133258717Savg uint64_t start = -1ULL; 134258717Savg int64_t refcnt = 0; 135258717Savg space_ref_t *sr; 136258717Savg 137258717Savg ASSERT(MUTEX_HELD(rt->rt_lock)); 138258717Savg 139258717Savg range_tree_vacate(rt, NULL, NULL); 140258717Savg 141258717Savg for (sr = avl_first(t); sr != NULL; sr = AVL_NEXT(t, sr)) { 142258717Savg refcnt += sr->sr_refcnt; 143258717Savg if (refcnt >= minref) { 144258717Savg if (start == -1ULL) { 145258717Savg start = sr->sr_offset; 146258717Savg } 147258717Savg } else { 148258717Savg if (start != -1ULL) { 149258717Savg uint64_t end = sr->sr_offset; 150258717Savg ASSERT(start <= end); 151258717Savg if (end > start) 152258717Savg range_tree_add(rt, start, end - start); 153258717Savg start = -1ULL; 154258717Savg } 155258717Savg } 156258717Savg } 157258717Savg ASSERT(refcnt == 0); 158258717Savg ASSERT(start == -1ULL); 159258717Savg} 160