1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * Copyright (C) 2011 Red Hat, Inc. 4 * 5 * This file is released under the GPL. 6 */ 7#ifndef _LINUX_DM_BTREE_H 8#define _LINUX_DM_BTREE_H 9 10#include "dm-block-manager.h" 11 12struct dm_transaction_manager; 13 14/*----------------------------------------------------------------*/ 15 16/* 17 * Annotations used to check on-disk metadata is handled as little-endian. 18 */ 19#ifdef __CHECKER__ 20# define __dm_written_to_disk(x) __releases(x) 21# define __dm_reads_from_disk(x) __acquires(x) 22# define __dm_bless_for_disk(x) __acquire(x) 23# define __dm_unbless_for_disk(x) __release(x) 24#else 25# define __dm_written_to_disk(x) 26# define __dm_reads_from_disk(x) 27# define __dm_bless_for_disk(x) 28# define __dm_unbless_for_disk(x) 29#endif 30 31/*----------------------------------------------------------------*/ 32 33/* 34 * Manipulates hierarchical B+ trees with 64-bit keys and arbitrary-sized 35 * values. 36 */ 37 38/* 39 * Information about the values stored within the btree. 40 */ 41struct dm_btree_value_type { 42 void *context; 43 44 /* 45 * The size in bytes of each value. 46 */ 47 uint32_t size; 48 49 /* 50 * Any of these methods can be safely set to NULL if you do not 51 * need the corresponding feature. 52 */ 53 54 /* 55 * The btree is making a duplicate of a run of values, for instance 56 * because previously-shared btree nodes have now diverged. 57 * @value argument is the new copy that the copy function may modify. 58 * (Probably it just wants to increment a reference count 59 * somewhere.) This method is _not_ called for insertion of a new 60 * value: It is assumed the ref count is already 1. 61 */ 62 void (*inc)(void *context, const void *value, unsigned int count); 63 64 /* 65 * These values are being deleted. The btree takes care of freeing 66 * the memory pointed to by @value. Often the del function just 67 * needs to decrement a reference counts somewhere. 68 */ 69 void (*dec)(void *context, const void *value, unsigned int count); 70 71 /* 72 * A test for equality between two values. When a value is 73 * overwritten with a new one, the old one has the dec method 74 * called _unless_ the new and old value are deemed equal. 75 */ 76 int (*equal)(void *context, const void *value1, const void *value2); 77}; 78 79/* 80 * The shape and contents of a btree. 81 */ 82struct dm_btree_info { 83 struct dm_transaction_manager *tm; 84 85 /* 86 * Number of nested btrees. (Not the depth of a single tree.) 87 */ 88 unsigned int levels; 89 struct dm_btree_value_type value_type; 90}; 91 92/* 93 * Set up an empty tree. O(1). 94 */ 95int dm_btree_empty(struct dm_btree_info *info, dm_block_t *root); 96 97/* 98 * Delete a tree. O(n) - this is the slow one! It can also block, so 99 * please don't call it on an IO path. 100 */ 101int dm_btree_del(struct dm_btree_info *info, dm_block_t root); 102 103/* 104 * All the lookup functions return -ENODATA if the key cannot be found. 105 */ 106 107/* 108 * Tries to find a key that matches exactly. O(ln(n)) 109 */ 110int dm_btree_lookup(struct dm_btree_info *info, dm_block_t root, 111 uint64_t *keys, void *value_le); 112 113/* 114 * Tries to find the first key where the bottom level key is >= to that 115 * given. Useful for skipping empty sections of the btree. 116 */ 117int dm_btree_lookup_next(struct dm_btree_info *info, dm_block_t root, 118 uint64_t *keys, uint64_t *rkey, void *value_le); 119 120/* 121 * Insertion (or overwrite an existing value). O(ln(n)) 122 */ 123int dm_btree_insert(struct dm_btree_info *info, dm_block_t root, 124 uint64_t *keys, void *value, dm_block_t *new_root) 125 __dm_written_to_disk(value); 126 127/* 128 * A variant of insert that indicates whether it actually inserted or just 129 * overwrote. Useful if you're keeping track of the number of entries in a 130 * tree. 131 */ 132int dm_btree_insert_notify(struct dm_btree_info *info, dm_block_t root, 133 uint64_t *keys, void *value, dm_block_t *new_root, 134 int *inserted) 135 __dm_written_to_disk(value); 136 137/* 138 * Remove a key if present. This doesn't remove empty sub trees. Normally 139 * subtrees represent a separate entity, like a snapshot map, so this is 140 * correct behaviour. O(ln(n)). 141 */ 142int dm_btree_remove(struct dm_btree_info *info, dm_block_t root, 143 uint64_t *keys, dm_block_t *new_root); 144 145/* 146 * Removes a _contiguous_ run of values starting from 'keys' and not 147 * reaching keys2 (where keys2 is keys with the final key replaced with 148 * 'end_key'). 'end_key' is the one-past-the-end value. 'keys' may be 149 * altered. 150 */ 151int dm_btree_remove_leaves(struct dm_btree_info *info, dm_block_t root, 152 uint64_t *keys, uint64_t end_key, 153 dm_block_t *new_root, unsigned int *nr_removed); 154 155/* 156 * Returns < 0 on failure. Otherwise the number of key entries that have 157 * been filled out. Remember trees can have zero entries, and as such have 158 * no lowest key. 159 */ 160int dm_btree_find_lowest_key(struct dm_btree_info *info, dm_block_t root, 161 uint64_t *result_keys); 162 163/* 164 * Returns < 0 on failure. Otherwise the number of key entries that have 165 * been filled out. Remember trees can have zero entries, and as such have 166 * no highest key. 167 */ 168int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root, 169 uint64_t *result_keys); 170 171/* 172 * Iterate through the a btree, calling fn() on each entry. 173 * It only works for single level trees and is internally recursive, so 174 * monitor stack usage carefully. 175 */ 176int dm_btree_walk(struct dm_btree_info *info, dm_block_t root, 177 int (*fn)(void *context, uint64_t *keys, void *leaf), 178 void *context); 179 180 181/*----------------------------------------------------------------*/ 182 183/* 184 * Cursor API. This does not follow the rolling lock convention. Since we 185 * know the order that values are required we can issue prefetches to speed 186 * up iteration. Use on a single level btree only. 187 */ 188#define DM_BTREE_CURSOR_MAX_DEPTH 16 189 190struct cursor_node { 191 struct dm_block *b; 192 unsigned int index; 193}; 194 195struct dm_btree_cursor { 196 struct dm_btree_info *info; 197 dm_block_t root; 198 199 bool prefetch_leaves; 200 unsigned int depth; 201 struct cursor_node nodes[DM_BTREE_CURSOR_MAX_DEPTH]; 202}; 203 204/* 205 * Creates a fresh cursor. If prefetch_leaves is set then it is assumed 206 * the btree contains block indexes that will be prefetched. The cursor is 207 * quite large, so you probably don't want to put it on the stack. 208 */ 209int dm_btree_cursor_begin(struct dm_btree_info *info, dm_block_t root, 210 bool prefetch_leaves, struct dm_btree_cursor *c); 211void dm_btree_cursor_end(struct dm_btree_cursor *c); 212int dm_btree_cursor_next(struct dm_btree_cursor *c); 213int dm_btree_cursor_skip(struct dm_btree_cursor *c, uint32_t count); 214int dm_btree_cursor_get_value(struct dm_btree_cursor *c, uint64_t *key, void *value_le); 215 216#endif /* _LINUX_DM_BTREE_H */ 217