1168404Spjd/*
2168404Spjd * CDDL HEADER START
3168404Spjd *
4168404Spjd * The contents of this file are subject to the terms of the
5168404Spjd * Common Development and Distribution License (the "License").
6168404Spjd * You may not use this file except in compliance with the License.
7168404Spjd *
8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9168404Spjd * or http://www.opensolaris.org/os/licensing.
10168404Spjd * See the License for the specific language governing permissions
11168404Spjd * and limitations under the License.
12168404Spjd *
13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each
14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15168404Spjd * If applicable, add the following below this CDDL HEADER, with the
16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying
17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner]
18168404Spjd *
19168404Spjd * CDDL HEADER END
20168404Spjd */
21168404Spjd/*
22219089Spjd * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23168404Spjd * Use is subject to license terms.
24168404Spjd */
25168404Spjd
26168404Spjd/*
27269229Sdelphij * Copyright (c) 2014 by Delphix. All rights reserved.
28287703Sdelphij * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
29269229Sdelphij */
30269229Sdelphij
31269229Sdelphij/*
32168404Spjd * AVL - generic AVL tree implementation for kernel use
33168404Spjd *
34168404Spjd * A complete description of AVL trees can be found in many CS textbooks.
35168404Spjd *
36168404Spjd * Here is a very brief overview. An AVL tree is a binary search tree that is
37168404Spjd * almost perfectly balanced. By "almost" perfectly balanced, we mean that at
38168404Spjd * any given node, the left and right subtrees are allowed to differ in height
39168404Spjd * by at most 1 level.
40168404Spjd *
41168404Spjd * This relaxation from a perfectly balanced binary tree allows doing
42168404Spjd * insertion and deletion relatively efficiently. Searching the tree is
43168404Spjd * still a fast operation, roughly O(log(N)).
44168404Spjd *
45264836Sdelphij * The key to insertion and deletion is a set of tree manipulations called
46168404Spjd * rotations, which bring unbalanced subtrees back into the semi-balanced state.
47168404Spjd *
48168404Spjd * This implementation of AVL trees has the following peculiarities:
49168404Spjd *
50168404Spjd *	- The AVL specific data structures are physically embedded as fields
51168404Spjd *	  in the "using" data structures.  To maintain generality the code
52168404Spjd *	  must constantly translate between "avl_node_t *" and containing
53264836Sdelphij *	  data structure "void *"s by adding/subtracting the avl_offset.
54168404Spjd *
55168404Spjd *	- Since the AVL data is always embedded in other structures, there is
56168404Spjd *	  no locking or memory allocation in the AVL routines. This must be
57168404Spjd *	  provided for by the enclosing data structure's semantics. Typically,
58168404Spjd *	  avl_insert()/_add()/_remove()/avl_insert_here() require some kind of
59168404Spjd *	  exclusive write lock. Other operations require a read lock.
60168404Spjd *
61168404Spjd *      - The implementation uses iteration instead of explicit recursion,
62168404Spjd *	  since it is intended to run on limited size kernel stacks. Since
63168404Spjd *	  there is no recursion stack present to move "up" in the tree,
64168404Spjd *	  there is an explicit "parent" link in the avl_node_t.
65168404Spjd *
66168404Spjd *      - The left/right children pointers of a node are in an array.
67168404Spjd *	  In the code, variables (instead of constants) are used to represent
68168404Spjd *	  left and right indices.  The implementation is written as if it only
69168404Spjd *	  dealt with left handed manipulations.  By changing the value assigned
70168404Spjd *	  to "left", the code also works for right handed trees.  The
71168404Spjd *	  following variables/terms are frequently used:
72168404Spjd *
73168404Spjd *		int left;	// 0 when dealing with left children,
74168404Spjd *				// 1 for dealing with right children
75168404Spjd *
76168404Spjd *		int left_heavy;	// -1 when left subtree is taller at some node,
77168404Spjd *				// +1 when right subtree is taller
78168404Spjd *
79168404Spjd *		int right;	// will be the opposite of left (0 or 1)
80168404Spjd *		int right_heavy;// will be the opposite of left_heavy (-1 or 1)
81168404Spjd *
82168404Spjd *		int direction;  // 0 for "<" (ie. left child); 1 for ">" (right)
83168404Spjd *
84168404Spjd *	  Though it is a little more confusing to read the code, the approach
85168404Spjd *	  allows using half as much code (and hence cache footprint) for tree
86168404Spjd *	  manipulations and eliminates many conditional branches.
87168404Spjd *
88168404Spjd *	- The avl_index_t is an opaque "cookie" used to find nodes at or
89168404Spjd *	  adjacent to where a new value would be inserted in the tree. The value
90168404Spjd *	  is a modified "avl_node_t *".  The bottom bit (normally 0 for a
91168404Spjd *	  pointer) is set to indicate if that the new node has a value greater
92168404Spjd *	  than the value of the indicated "avl_node_t *".
93269229Sdelphij *
94269229Sdelphij * Note - in addition to userland (e.g. libavl and libutil) and the kernel
95269229Sdelphij * (e.g. genunix), avl.c is compiled into ld.so and kmdb's genunix module,
96269229Sdelphij * which each have their own compilation environments and subsequent
97269229Sdelphij * requirements. Each of these environments must be considered when adding
98269229Sdelphij * dependencies from avl.c.
99168404Spjd */
100168404Spjd
101168404Spjd#include <sys/types.h>
102168404Spjd#include <sys/param.h>
103174046Sjb#include <sys/stdint.h>
104168404Spjd#include <sys/debug.h>
105168404Spjd#include <sys/avl.h>
106168404Spjd
107168404Spjd/*
108264836Sdelphij * Small arrays to translate between balance (or diff) values and child indices.
109168404Spjd *
110168404Spjd * Code that deals with binary tree data structures will randomly use
111168404Spjd * left and right children when examining a tree.  C "if()" statements
112168404Spjd * which evaluate randomly suffer from very poor hardware branch prediction.
113168404Spjd * In this code we avoid some of the branch mispredictions by using the
114168404Spjd * following translation arrays. They replace random branches with an
115168404Spjd * additional memory reference. Since the translation arrays are both very
116168404Spjd * small the data should remain efficiently in cache.
117168404Spjd */
118168404Spjdstatic const int  avl_child2balance[2]	= {-1, 1};
119168404Spjdstatic const int  avl_balance2child[]	= {0, 0, 1};
120168404Spjd
121168404Spjd
122168404Spjd/*
123168404Spjd * Walk from one node to the previous valued node (ie. an infix walk
124168404Spjd * towards the left). At any given node we do one of 2 things:
125168404Spjd *
126168404Spjd * - If there is a left child, go to it, then to it's rightmost descendant.
127168404Spjd *
128264836Sdelphij * - otherwise we return through parent nodes until we've come from a right
129264836Sdelphij *   child.
130168404Spjd *
131168404Spjd * Return Value:
132168404Spjd * NULL - if at the end of the nodes
133168404Spjd * otherwise next node
134168404Spjd */
135168404Spjdvoid *
136168404Spjdavl_walk(avl_tree_t *tree, void	*oldnode, int left)
137168404Spjd{
138168404Spjd	size_t off = tree->avl_offset;
139168404Spjd	avl_node_t *node = AVL_DATA2NODE(oldnode, off);
140168404Spjd	int right = 1 - left;
141168404Spjd	int was_child;
142168404Spjd
143168404Spjd
144168404Spjd	/*
145168404Spjd	 * nowhere to walk to if tree is empty
146168404Spjd	 */
147168404Spjd	if (node == NULL)
148168404Spjd		return (NULL);
149168404Spjd
150168404Spjd	/*
151168404Spjd	 * Visit the previous valued node. There are two possibilities:
152168404Spjd	 *
153168404Spjd	 * If this node has a left child, go down one left, then all
154168404Spjd	 * the way right.
155168404Spjd	 */
156168404Spjd	if (node->avl_child[left] != NULL) {
157168404Spjd		for (node = node->avl_child[left];
158168404Spjd		    node->avl_child[right] != NULL;
159168404Spjd		    node = node->avl_child[right])
160168404Spjd			;
161168404Spjd	/*
162168404Spjd	 * Otherwise, return thru left children as far as we can.
163168404Spjd	 */
164168404Spjd	} else {
165168404Spjd		for (;;) {
166168404Spjd			was_child = AVL_XCHILD(node);
167168404Spjd			node = AVL_XPARENT(node);
168168404Spjd			if (node == NULL)
169168404Spjd				return (NULL);
170168404Spjd			if (was_child == right)
171168404Spjd				break;
172168404Spjd		}
173168404Spjd	}
174168404Spjd
175168404Spjd	return (AVL_NODE2DATA(node, off));
176168404Spjd}
177168404Spjd
178168404Spjd/*
179168404Spjd * Return the lowest valued node in a tree or NULL.
180168404Spjd * (leftmost child from root of tree)
181168404Spjd */
182168404Spjdvoid *
183168404Spjdavl_first(avl_tree_t *tree)
184168404Spjd{
185168404Spjd	avl_node_t *node;
186168404Spjd	avl_node_t *prev = NULL;
187168404Spjd	size_t off = tree->avl_offset;
188168404Spjd
189168404Spjd	for (node = tree->avl_root; node != NULL; node = node->avl_child[0])
190168404Spjd		prev = node;
191168404Spjd
192168404Spjd	if (prev != NULL)
193168404Spjd		return (AVL_NODE2DATA(prev, off));
194168404Spjd	return (NULL);
195168404Spjd}
196168404Spjd
197168404Spjd/*
198168404Spjd * Return the highest valued node in a tree or NULL.
199168404Spjd * (rightmost child from root of tree)
200168404Spjd */
201168404Spjdvoid *
202168404Spjdavl_last(avl_tree_t *tree)
203168404Spjd{
204168404Spjd	avl_node_t *node;
205168404Spjd	avl_node_t *prev = NULL;
206168404Spjd	size_t off = tree->avl_offset;
207168404Spjd
208168404Spjd	for (node = tree->avl_root; node != NULL; node = node->avl_child[1])
209168404Spjd		prev = node;
210168404Spjd
211168404Spjd	if (prev != NULL)
212168404Spjd		return (AVL_NODE2DATA(prev, off));
213168404Spjd	return (NULL);
214168404Spjd}
215168404Spjd
216168404Spjd/*
217168404Spjd * Access the node immediately before or after an insertion point.
218168404Spjd *
219168404Spjd * "avl_index_t" is a (avl_node_t *) with the bottom bit indicating a child
220168404Spjd *
221168404Spjd * Return value:
222168404Spjd *	NULL: no node in the given direction
223168404Spjd *	"void *"  of the found tree node
224168404Spjd */
225168404Spjdvoid *
226168404Spjdavl_nearest(avl_tree_t *tree, avl_index_t where, int direction)
227168404Spjd{
228168404Spjd	int child = AVL_INDEX2CHILD(where);
229168404Spjd	avl_node_t *node = AVL_INDEX2NODE(where);
230168404Spjd	void *data;
231168404Spjd	size_t off = tree->avl_offset;
232168404Spjd
233168404Spjd	if (node == NULL) {
234168404Spjd		ASSERT(tree->avl_root == NULL);
235168404Spjd		return (NULL);
236168404Spjd	}
237168404Spjd	data = AVL_NODE2DATA(node, off);
238168404Spjd	if (child != direction)
239168404Spjd		return (data);
240168404Spjd
241168404Spjd	return (avl_walk(tree, data, direction));
242168404Spjd}
243168404Spjd
244168404Spjd
245168404Spjd/*
246168404Spjd * Search for the node which contains "value".  The algorithm is a
247168404Spjd * simple binary tree search.
248168404Spjd *
249168404Spjd * return value:
250168404Spjd *	NULL: the value is not in the AVL tree
251168404Spjd *		*where (if not NULL)  is set to indicate the insertion point
252168404Spjd *	"void *"  of the found tree node
253168404Spjd */
254168404Spjdvoid *
255219089Spjdavl_find(avl_tree_t *tree, const void *value, avl_index_t *where)
256168404Spjd{
257168404Spjd	avl_node_t *node;
258168404Spjd	avl_node_t *prev = NULL;
259168404Spjd	int child = 0;
260168404Spjd	int diff;
261168404Spjd	size_t off = tree->avl_offset;
262168404Spjd
263168404Spjd	for (node = tree->avl_root; node != NULL;
264168404Spjd	    node = node->avl_child[child]) {
265168404Spjd
266168404Spjd		prev = node;
267168404Spjd
268168404Spjd		diff = tree->avl_compar(value, AVL_NODE2DATA(node, off));
269168404Spjd		ASSERT(-1 <= diff && diff <= 1);
270168404Spjd		if (diff == 0) {
271168404Spjd#ifdef DEBUG
272168404Spjd			if (where != NULL)
273168404Spjd				*where = 0;
274168404Spjd#endif
275168404Spjd			return (AVL_NODE2DATA(node, off));
276168404Spjd		}
277168404Spjd		child = avl_balance2child[1 + diff];
278168404Spjd
279168404Spjd	}
280168404Spjd
281168404Spjd	if (where != NULL)
282168404Spjd		*where = AVL_MKINDEX(prev, child);
283168404Spjd
284168404Spjd	return (NULL);
285168404Spjd}
286168404Spjd
287168404Spjd
288168404Spjd/*
289168404Spjd * Perform a rotation to restore balance at the subtree given by depth.
290168404Spjd *
291168404Spjd * This routine is used by both insertion and deletion. The return value
292168404Spjd * indicates:
293168404Spjd *	 0 : subtree did not change height
294168404Spjd *	!0 : subtree was reduced in height
295168404Spjd *
296168404Spjd * The code is written as if handling left rotations, right rotations are
297168404Spjd * symmetric and handled by swapping values of variables right/left[_heavy]
298168404Spjd *
299168404Spjd * On input balance is the "new" balance at "node". This value is either
300168404Spjd * -2 or +2.
301168404Spjd */
302168404Spjdstatic int
303168404Spjdavl_rotation(avl_tree_t *tree, avl_node_t *node, int balance)
304168404Spjd{
305168404Spjd	int left = !(balance < 0);	/* when balance = -2, left will be 0 */
306168404Spjd	int right = 1 - left;
307168404Spjd	int left_heavy = balance >> 1;
308168404Spjd	int right_heavy = -left_heavy;
309168404Spjd	avl_node_t *parent = AVL_XPARENT(node);
310168404Spjd	avl_node_t *child = node->avl_child[left];
311168404Spjd	avl_node_t *cright;
312168404Spjd	avl_node_t *gchild;
313168404Spjd	avl_node_t *gright;
314168404Spjd	avl_node_t *gleft;
315168404Spjd	int which_child = AVL_XCHILD(node);
316168404Spjd	int child_bal = AVL_XBALANCE(child);
317168404Spjd
318168404Spjd	/* BEGIN CSTYLED */
319168404Spjd	/*
320168404Spjd	 * case 1 : node is overly left heavy, the left child is balanced or
321168404Spjd	 * also left heavy. This requires the following rotation.
322168404Spjd	 *
323168404Spjd	 *                   (node bal:-2)
324168404Spjd	 *                    /           \
325168404Spjd	 *                   /             \
326168404Spjd	 *              (child bal:0 or -1)
327168404Spjd	 *              /    \
328168404Spjd	 *             /      \
329168404Spjd	 *                     cright
330168404Spjd	 *
331168404Spjd	 * becomes:
332168404Spjd	 *
333168404Spjd	 *              (child bal:1 or 0)
334168404Spjd	 *              /        \
335168404Spjd	 *             /          \
336168404Spjd	 *                        (node bal:-1 or 0)
337168404Spjd	 *                         /     \
338168404Spjd	 *                        /       \
339168404Spjd	 *                     cright
340168404Spjd	 *
341168404Spjd	 * we detect this situation by noting that child's balance is not
342168404Spjd	 * right_heavy.
343168404Spjd	 */
344168404Spjd	/* END CSTYLED */
345168404Spjd	if (child_bal != right_heavy) {
346168404Spjd
347168404Spjd		/*
348168404Spjd		 * compute new balance of nodes
349168404Spjd		 *
350168404Spjd		 * If child used to be left heavy (now balanced) we reduced
351168404Spjd		 * the height of this sub-tree -- used in "return...;" below
352168404Spjd		 */
353168404Spjd		child_bal += right_heavy; /* adjust towards right */
354168404Spjd
355168404Spjd		/*
356168404Spjd		 * move "cright" to be node's left child
357168404Spjd		 */
358168404Spjd		cright = child->avl_child[right];
359168404Spjd		node->avl_child[left] = cright;
360168404Spjd		if (cright != NULL) {
361168404Spjd			AVL_SETPARENT(cright, node);
362168404Spjd			AVL_SETCHILD(cright, left);
363168404Spjd		}
364168404Spjd
365168404Spjd		/*
366168404Spjd		 * move node to be child's right child
367168404Spjd		 */
368168404Spjd		child->avl_child[right] = node;
369168404Spjd		AVL_SETBALANCE(node, -child_bal);
370168404Spjd		AVL_SETCHILD(node, right);
371168404Spjd		AVL_SETPARENT(node, child);
372168404Spjd
373168404Spjd		/*
374168404Spjd		 * update the pointer into this subtree
375168404Spjd		 */
376168404Spjd		AVL_SETBALANCE(child, child_bal);
377168404Spjd		AVL_SETCHILD(child, which_child);
378168404Spjd		AVL_SETPARENT(child, parent);
379168404Spjd		if (parent != NULL)
380168404Spjd			parent->avl_child[which_child] = child;
381168404Spjd		else
382168404Spjd			tree->avl_root = child;
383168404Spjd
384168404Spjd		return (child_bal == 0);
385168404Spjd	}
386168404Spjd
387168404Spjd	/* BEGIN CSTYLED */
388168404Spjd	/*
389168404Spjd	 * case 2 : When node is left heavy, but child is right heavy we use
390168404Spjd	 * a different rotation.
391168404Spjd	 *
392168404Spjd	 *                   (node b:-2)
393168404Spjd	 *                    /   \
394168404Spjd	 *                   /     \
395168404Spjd	 *                  /       \
396168404Spjd	 *             (child b:+1)
397168404Spjd	 *              /     \
398168404Spjd	 *             /       \
399168404Spjd	 *                   (gchild b: != 0)
400168404Spjd	 *                     /  \
401168404Spjd	 *                    /    \
402168404Spjd	 *                 gleft   gright
403168404Spjd	 *
404168404Spjd	 * becomes:
405168404Spjd	 *
406168404Spjd	 *              (gchild b:0)
407168404Spjd	 *              /       \
408168404Spjd	 *             /         \
409168404Spjd	 *            /           \
410168404Spjd	 *        (child b:?)   (node b:?)
411168404Spjd	 *         /  \          /   \
412168404Spjd	 *        /    \        /     \
413168404Spjd	 *            gleft   gright
414168404Spjd	 *
415168404Spjd	 * computing the new balances is more complicated. As an example:
416168404Spjd	 *	 if gchild was right_heavy, then child is now left heavy
417168404Spjd	 *		else it is balanced
418168404Spjd	 */
419168404Spjd	/* END CSTYLED */
420168404Spjd	gchild = child->avl_child[right];
421168404Spjd	gleft = gchild->avl_child[left];
422168404Spjd	gright = gchild->avl_child[right];
423168404Spjd
424168404Spjd	/*
425168404Spjd	 * move gright to left child of node and
426168404Spjd	 *
427168404Spjd	 * move gleft to right child of node
428168404Spjd	 */
429168404Spjd	node->avl_child[left] = gright;
430168404Spjd	if (gright != NULL) {
431168404Spjd		AVL_SETPARENT(gright, node);
432168404Spjd		AVL_SETCHILD(gright, left);
433168404Spjd	}
434168404Spjd
435168404Spjd	child->avl_child[right] = gleft;
436168404Spjd	if (gleft != NULL) {
437168404Spjd		AVL_SETPARENT(gleft, child);
438168404Spjd		AVL_SETCHILD(gleft, right);
439168404Spjd	}
440168404Spjd
441168404Spjd	/*
442168404Spjd	 * move child to left child of gchild and
443168404Spjd	 *
444168404Spjd	 * move node to right child of gchild and
445168404Spjd	 *
446168404Spjd	 * fixup parent of all this to point to gchild
447168404Spjd	 */
448168404Spjd	balance = AVL_XBALANCE(gchild);
449168404Spjd	gchild->avl_child[left] = child;
450168404Spjd	AVL_SETBALANCE(child, (balance == right_heavy ? left_heavy : 0));
451168404Spjd	AVL_SETPARENT(child, gchild);
452168404Spjd	AVL_SETCHILD(child, left);
453168404Spjd
454168404Spjd	gchild->avl_child[right] = node;
455168404Spjd	AVL_SETBALANCE(node, (balance == left_heavy ? right_heavy : 0));
456168404Spjd	AVL_SETPARENT(node, gchild);
457168404Spjd	AVL_SETCHILD(node, right);
458168404Spjd
459168404Spjd	AVL_SETBALANCE(gchild, 0);
460168404Spjd	AVL_SETPARENT(gchild, parent);
461168404Spjd	AVL_SETCHILD(gchild, which_child);
462168404Spjd	if (parent != NULL)
463168404Spjd		parent->avl_child[which_child] = gchild;
464168404Spjd	else
465168404Spjd		tree->avl_root = gchild;
466168404Spjd
467168404Spjd	return (1);	/* the new tree is always shorter */
468168404Spjd}
469168404Spjd
470168404Spjd
471168404Spjd/*
472168404Spjd * Insert a new node into an AVL tree at the specified (from avl_find()) place.
473168404Spjd *
474168404Spjd * Newly inserted nodes are always leaf nodes in the tree, since avl_find()
475168404Spjd * searches out to the leaf positions.  The avl_index_t indicates the node
476168404Spjd * which will be the parent of the new node.
477168404Spjd *
478168404Spjd * After the node is inserted, a single rotation further up the tree may
479168404Spjd * be necessary to maintain an acceptable AVL balance.
480168404Spjd */
481168404Spjdvoid
482168404Spjdavl_insert(avl_tree_t *tree, void *new_data, avl_index_t where)
483168404Spjd{
484168404Spjd	avl_node_t *node;
485168404Spjd	avl_node_t *parent = AVL_INDEX2NODE(where);
486168404Spjd	int old_balance;
487168404Spjd	int new_balance;
488168404Spjd	int which_child = AVL_INDEX2CHILD(where);
489168404Spjd	size_t off = tree->avl_offset;
490168404Spjd
491168404Spjd	ASSERT(tree);
492168404Spjd#ifdef _LP64
493168404Spjd	ASSERT(((uintptr_t)new_data & 0x7) == 0);
494168404Spjd#endif
495168404Spjd
496168404Spjd	node = AVL_DATA2NODE(new_data, off);
497168404Spjd
498168404Spjd	/*
499168404Spjd	 * First, add the node to the tree at the indicated position.
500168404Spjd	 */
501168404Spjd	++tree->avl_numnodes;
502168404Spjd
503168404Spjd	node->avl_child[0] = NULL;
504168404Spjd	node->avl_child[1] = NULL;
505168404Spjd
506168404Spjd	AVL_SETCHILD(node, which_child);
507168404Spjd	AVL_SETBALANCE(node, 0);
508168404Spjd	AVL_SETPARENT(node, parent);
509168404Spjd	if (parent != NULL) {
510168404Spjd		ASSERT(parent->avl_child[which_child] == NULL);
511168404Spjd		parent->avl_child[which_child] = node;
512168404Spjd	} else {
513168404Spjd		ASSERT(tree->avl_root == NULL);
514168404Spjd		tree->avl_root = node;
515168404Spjd	}
516168404Spjd	/*
517168404Spjd	 * Now, back up the tree modifying the balance of all nodes above the
518168404Spjd	 * insertion point. If we get to a highly unbalanced ancestor, we
519168404Spjd	 * need to do a rotation.  If we back out of the tree we are done.
520168404Spjd	 * If we brought any subtree into perfect balance (0), we are also done.
521168404Spjd	 */
522168404Spjd	for (;;) {
523168404Spjd		node = parent;
524168404Spjd		if (node == NULL)
525168404Spjd			return;
526168404Spjd
527168404Spjd		/*
528168404Spjd		 * Compute the new balance
529168404Spjd		 */
530168404Spjd		old_balance = AVL_XBALANCE(node);
531168404Spjd		new_balance = old_balance + avl_child2balance[which_child];
532168404Spjd
533168404Spjd		/*
534168404Spjd		 * If we introduced equal balance, then we are done immediately
535168404Spjd		 */
536168404Spjd		if (new_balance == 0) {
537168404Spjd			AVL_SETBALANCE(node, 0);
538168404Spjd			return;
539168404Spjd		}
540168404Spjd
541168404Spjd		/*
542168404Spjd		 * If both old and new are not zero we went
543168404Spjd		 * from -1 to -2 balance, do a rotation.
544168404Spjd		 */
545168404Spjd		if (old_balance != 0)
546168404Spjd			break;
547168404Spjd
548168404Spjd		AVL_SETBALANCE(node, new_balance);
549168404Spjd		parent = AVL_XPARENT(node);
550168404Spjd		which_child = AVL_XCHILD(node);
551168404Spjd	}
552168404Spjd
553168404Spjd	/*
554168404Spjd	 * perform a rotation to fix the tree and return
555168404Spjd	 */
556168404Spjd	(void) avl_rotation(tree, node, new_balance);
557168404Spjd}
558168404Spjd
559168404Spjd/*
560168404Spjd * Insert "new_data" in "tree" in the given "direction" either after or
561168404Spjd * before (AVL_AFTER, AVL_BEFORE) the data "here".
562168404Spjd *
563168404Spjd * Insertions can only be done at empty leaf points in the tree, therefore
564168404Spjd * if the given child of the node is already present we move to either
565168404Spjd * the AVL_PREV or AVL_NEXT and reverse the insertion direction. Since
566168404Spjd * every other node in the tree is a leaf, this always works.
567168404Spjd *
568168404Spjd * To help developers using this interface, we assert that the new node
569168404Spjd * is correctly ordered at every step of the way in DEBUG kernels.
570168404Spjd */
571168404Spjdvoid
572168404Spjdavl_insert_here(
573168404Spjd	avl_tree_t *tree,
574168404Spjd	void *new_data,
575168404Spjd	void *here,
576168404Spjd	int direction)
577168404Spjd{
578168404Spjd	avl_node_t *node;
579168404Spjd	int child = direction;	/* rely on AVL_BEFORE == 0, AVL_AFTER == 1 */
580168404Spjd#ifdef DEBUG
581168404Spjd	int diff;
582168404Spjd#endif
583168404Spjd
584168404Spjd	ASSERT(tree != NULL);
585168404Spjd	ASSERT(new_data != NULL);
586168404Spjd	ASSERT(here != NULL);
587168404Spjd	ASSERT(direction == AVL_BEFORE || direction == AVL_AFTER);
588168404Spjd
589168404Spjd	/*
590168404Spjd	 * If corresponding child of node is not NULL, go to the neighboring
591168404Spjd	 * node and reverse the insertion direction.
592168404Spjd	 */
593168404Spjd	node = AVL_DATA2NODE(here, tree->avl_offset);
594168404Spjd
595168404Spjd#ifdef DEBUG
596168404Spjd	diff = tree->avl_compar(new_data, here);
597168404Spjd	ASSERT(-1 <= diff && diff <= 1);
598168404Spjd	ASSERT(diff != 0);
599168404Spjd	ASSERT(diff > 0 ? child == 1 : child == 0);
600168404Spjd#endif
601168404Spjd
602168404Spjd	if (node->avl_child[child] != NULL) {
603168404Spjd		node = node->avl_child[child];
604168404Spjd		child = 1 - child;
605168404Spjd		while (node->avl_child[child] != NULL) {
606168404Spjd#ifdef DEBUG
607168404Spjd			diff = tree->avl_compar(new_data,
608168404Spjd			    AVL_NODE2DATA(node, tree->avl_offset));
609168404Spjd			ASSERT(-1 <= diff && diff <= 1);
610168404Spjd			ASSERT(diff != 0);
611168404Spjd			ASSERT(diff > 0 ? child == 1 : child == 0);
612168404Spjd#endif
613168404Spjd			node = node->avl_child[child];
614168404Spjd		}
615168404Spjd#ifdef DEBUG
616168404Spjd		diff = tree->avl_compar(new_data,
617168404Spjd		    AVL_NODE2DATA(node, tree->avl_offset));
618168404Spjd		ASSERT(-1 <= diff && diff <= 1);
619168404Spjd		ASSERT(diff != 0);
620168404Spjd		ASSERT(diff > 0 ? child == 1 : child == 0);
621168404Spjd#endif
622168404Spjd	}
623168404Spjd	ASSERT(node->avl_child[child] == NULL);
624168404Spjd
625168404Spjd	avl_insert(tree, new_data, AVL_MKINDEX(node, child));
626168404Spjd}
627168404Spjd
628168404Spjd/*
629168404Spjd * Add a new node to an AVL tree.
630168404Spjd */
631168404Spjdvoid
632168404Spjdavl_add(avl_tree_t *tree, void *new_node)
633168404Spjd{
634168404Spjd	avl_index_t where;
635168404Spjd
636168404Spjd	/*
637168404Spjd	 * This is unfortunate.  We want to call panic() here, even for
638168404Spjd	 * non-DEBUG kernels.  In userland, however, we can't depend on anything
639287703Sdelphij	 * in libc or else the rtld build process gets confused.
640287703Sdelphij	 * Thankfully, rtld provides us with its own assfail() so we can use
641287703Sdelphij	 * that here.  We use assfail() directly to get a nice error message
642287703Sdelphij	 * in the core - much like what panic() does for crashdumps.
643168404Spjd	 */
644168404Spjd	if (avl_find(tree, new_node, &where) != NULL)
645168404Spjd#ifdef _KERNEL
646168404Spjd		panic("avl_find() succeeded inside avl_add()");
647168404Spjd#else
648287703Sdelphij		(void) assfail("avl_find() succeeded inside avl_add()",
649287703Sdelphij		    __FILE__, __LINE__);
650168404Spjd#endif
651168404Spjd	avl_insert(tree, new_node, where);
652168404Spjd}
653168404Spjd
654168404Spjd/*
655168404Spjd * Delete a node from the AVL tree.  Deletion is similar to insertion, but
656168404Spjd * with 2 complications.
657168404Spjd *
658168404Spjd * First, we may be deleting an interior node. Consider the following subtree:
659168404Spjd *
660168404Spjd *     d           c            c
661168404Spjd *    / \         / \          / \
662168404Spjd *   b   e       b   e        b   e
663168404Spjd *  / \	        / \          /
664168404Spjd * a   c       a            a
665168404Spjd *
666168404Spjd * When we are deleting node (d), we find and bring up an adjacent valued leaf
667168404Spjd * node, say (c), to take the interior node's place. In the code this is
668168404Spjd * handled by temporarily swapping (d) and (c) in the tree and then using
669168404Spjd * common code to delete (d) from the leaf position.
670168404Spjd *
671168404Spjd * Secondly, an interior deletion from a deep tree may require more than one
672168404Spjd * rotation to fix the balance. This is handled by moving up the tree through
673168404Spjd * parents and applying rotations as needed. The return value from
674168404Spjd * avl_rotation() is used to detect when a subtree did not change overall
675168404Spjd * height due to a rotation.
676168404Spjd */
677168404Spjdvoid
678168404Spjdavl_remove(avl_tree_t *tree, void *data)
679168404Spjd{
680168404Spjd	avl_node_t *delete;
681168404Spjd	avl_node_t *parent;
682168404Spjd	avl_node_t *node;
683168404Spjd	avl_node_t tmp;
684168404Spjd	int old_balance;
685168404Spjd	int new_balance;
686168404Spjd	int left;
687168404Spjd	int right;
688168404Spjd	int which_child;
689168404Spjd	size_t off = tree->avl_offset;
690168404Spjd
691168404Spjd	ASSERT(tree);
692168404Spjd
693168404Spjd	delete = AVL_DATA2NODE(data, off);
694168404Spjd
695168404Spjd	/*
696168404Spjd	 * Deletion is easiest with a node that has at most 1 child.
697168404Spjd	 * We swap a node with 2 children with a sequentially valued
698168404Spjd	 * neighbor node. That node will have at most 1 child. Note this
699168404Spjd	 * has no effect on the ordering of the remaining nodes.
700168404Spjd	 *
701168404Spjd	 * As an optimization, we choose the greater neighbor if the tree
702168404Spjd	 * is right heavy, otherwise the left neighbor. This reduces the
703168404Spjd	 * number of rotations needed.
704168404Spjd	 */
705168404Spjd	if (delete->avl_child[0] != NULL && delete->avl_child[1] != NULL) {
706168404Spjd
707168404Spjd		/*
708168404Spjd		 * choose node to swap from whichever side is taller
709168404Spjd		 */
710168404Spjd		old_balance = AVL_XBALANCE(delete);
711168404Spjd		left = avl_balance2child[old_balance + 1];
712168404Spjd		right = 1 - left;
713168404Spjd
714168404Spjd		/*
715168404Spjd		 * get to the previous value'd node
716168404Spjd		 * (down 1 left, as far as possible right)
717168404Spjd		 */
718168404Spjd		for (node = delete->avl_child[left];
719168404Spjd		    node->avl_child[right] != NULL;
720168404Spjd		    node = node->avl_child[right])
721168404Spjd			;
722168404Spjd
723168404Spjd		/*
724168404Spjd		 * create a temp placeholder for 'node'
725168404Spjd		 * move 'node' to delete's spot in the tree
726168404Spjd		 */
727168404Spjd		tmp = *node;
728168404Spjd
729168404Spjd		*node = *delete;
730168404Spjd		if (node->avl_child[left] == node)
731168404Spjd			node->avl_child[left] = &tmp;
732168404Spjd
733168404Spjd		parent = AVL_XPARENT(node);
734168404Spjd		if (parent != NULL)
735168404Spjd			parent->avl_child[AVL_XCHILD(node)] = node;
736168404Spjd		else
737168404Spjd			tree->avl_root = node;
738168404Spjd		AVL_SETPARENT(node->avl_child[left], node);
739168404Spjd		AVL_SETPARENT(node->avl_child[right], node);
740168404Spjd
741168404Spjd		/*
742168404Spjd		 * Put tmp where node used to be (just temporary).
743168404Spjd		 * It always has a parent and at most 1 child.
744168404Spjd		 */
745168404Spjd		delete = &tmp;
746168404Spjd		parent = AVL_XPARENT(delete);
747168404Spjd		parent->avl_child[AVL_XCHILD(delete)] = delete;
748168404Spjd		which_child = (delete->avl_child[1] != 0);
749168404Spjd		if (delete->avl_child[which_child] != NULL)
750168404Spjd			AVL_SETPARENT(delete->avl_child[which_child], delete);
751168404Spjd	}
752168404Spjd
753168404Spjd
754168404Spjd	/*
755168404Spjd	 * Here we know "delete" is at least partially a leaf node. It can
756168404Spjd	 * be easily removed from the tree.
757168404Spjd	 */
758168404Spjd	ASSERT(tree->avl_numnodes > 0);
759168404Spjd	--tree->avl_numnodes;
760168404Spjd	parent = AVL_XPARENT(delete);
761168404Spjd	which_child = AVL_XCHILD(delete);
762168404Spjd	if (delete->avl_child[0] != NULL)
763168404Spjd		node = delete->avl_child[0];
764168404Spjd	else
765168404Spjd		node = delete->avl_child[1];
766168404Spjd
767168404Spjd	/*
768168404Spjd	 * Connect parent directly to node (leaving out delete).
769168404Spjd	 */
770168404Spjd	if (node != NULL) {
771168404Spjd		AVL_SETPARENT(node, parent);
772168404Spjd		AVL_SETCHILD(node, which_child);
773168404Spjd	}
774168404Spjd	if (parent == NULL) {
775168404Spjd		tree->avl_root = node;
776168404Spjd		return;
777168404Spjd	}
778168404Spjd	parent->avl_child[which_child] = node;
779168404Spjd
780168404Spjd
781168404Spjd	/*
782168404Spjd	 * Since the subtree is now shorter, begin adjusting parent balances
783168404Spjd	 * and performing any needed rotations.
784168404Spjd	 */
785168404Spjd	do {
786168404Spjd
787168404Spjd		/*
788168404Spjd		 * Move up the tree and adjust the balance
789168404Spjd		 *
790168404Spjd		 * Capture the parent and which_child values for the next
791168404Spjd		 * iteration before any rotations occur.
792168404Spjd		 */
793168404Spjd		node = parent;
794168404Spjd		old_balance = AVL_XBALANCE(node);
795168404Spjd		new_balance = old_balance - avl_child2balance[which_child];
796168404Spjd		parent = AVL_XPARENT(node);
797168404Spjd		which_child = AVL_XCHILD(node);
798168404Spjd
799168404Spjd		/*
800168404Spjd		 * If a node was in perfect balance but isn't anymore then
801168404Spjd		 * we can stop, since the height didn't change above this point
802168404Spjd		 * due to a deletion.
803168404Spjd		 */
804168404Spjd		if (old_balance == 0) {
805168404Spjd			AVL_SETBALANCE(node, new_balance);
806168404Spjd			break;
807168404Spjd		}
808168404Spjd
809168404Spjd		/*
810168404Spjd		 * If the new balance is zero, we don't need to rotate
811168404Spjd		 * else
812168404Spjd		 * need a rotation to fix the balance.
813168404Spjd		 * If the rotation doesn't change the height
814168404Spjd		 * of the sub-tree we have finished adjusting.
815168404Spjd		 */
816168404Spjd		if (new_balance == 0)
817168404Spjd			AVL_SETBALANCE(node, new_balance);
818168404Spjd		else if (!avl_rotation(tree, node, new_balance))
819168404Spjd			break;
820168404Spjd	} while (parent != NULL);
821168404Spjd}
822168404Spjd
823185029Spjd#define	AVL_REINSERT(tree, obj)		\
824185029Spjd	avl_remove((tree), (obj));	\
825185029Spjd	avl_add((tree), (obj))
826185029Spjd
827185029Spjdboolean_t
828185029Spjdavl_update_lt(avl_tree_t *t, void *obj)
829185029Spjd{
830185029Spjd	void *neighbor;
831185029Spjd
832185029Spjd	ASSERT(((neighbor = AVL_NEXT(t, obj)) == NULL) ||
833185029Spjd	    (t->avl_compar(obj, neighbor) <= 0));
834185029Spjd
835185029Spjd	neighbor = AVL_PREV(t, obj);
836185029Spjd	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) < 0)) {
837185029Spjd		AVL_REINSERT(t, obj);
838185029Spjd		return (B_TRUE);
839185029Spjd	}
840185029Spjd
841185029Spjd	return (B_FALSE);
842185029Spjd}
843185029Spjd
844185029Spjdboolean_t
845185029Spjdavl_update_gt(avl_tree_t *t, void *obj)
846185029Spjd{
847185029Spjd	void *neighbor;
848185029Spjd
849185029Spjd	ASSERT(((neighbor = AVL_PREV(t, obj)) == NULL) ||
850185029Spjd	    (t->avl_compar(obj, neighbor) >= 0));
851185029Spjd
852185029Spjd	neighbor = AVL_NEXT(t, obj);
853185029Spjd	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) > 0)) {
854185029Spjd		AVL_REINSERT(t, obj);
855185029Spjd		return (B_TRUE);
856185029Spjd	}
857185029Spjd
858185029Spjd	return (B_FALSE);
859185029Spjd}
860185029Spjd
861185029Spjdboolean_t
862185029Spjdavl_update(avl_tree_t *t, void *obj)
863185029Spjd{
864185029Spjd	void *neighbor;
865185029Spjd
866185029Spjd	neighbor = AVL_PREV(t, obj);
867185029Spjd	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) < 0)) {
868185029Spjd		AVL_REINSERT(t, obj);
869185029Spjd		return (B_TRUE);
870185029Spjd	}
871185029Spjd
872185029Spjd	neighbor = AVL_NEXT(t, obj);
873185029Spjd	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) > 0)) {
874185029Spjd		AVL_REINSERT(t, obj);
875185029Spjd		return (B_TRUE);
876185029Spjd	}
877185029Spjd
878185029Spjd	return (B_FALSE);
879185029Spjd}
880185029Spjd
881269229Sdelphijvoid
882269229Sdelphijavl_swap(avl_tree_t *tree1, avl_tree_t *tree2)
883269229Sdelphij{
884269229Sdelphij	avl_node_t *temp_node;
885269229Sdelphij	ulong_t temp_numnodes;
886269229Sdelphij
887269229Sdelphij	ASSERT3P(tree1->avl_compar, ==, tree2->avl_compar);
888269229Sdelphij	ASSERT3U(tree1->avl_offset, ==, tree2->avl_offset);
889269229Sdelphij	ASSERT3U(tree1->avl_size, ==, tree2->avl_size);
890269229Sdelphij
891269229Sdelphij	temp_node = tree1->avl_root;
892269229Sdelphij	temp_numnodes = tree1->avl_numnodes;
893269229Sdelphij	tree1->avl_root = tree2->avl_root;
894269229Sdelphij	tree1->avl_numnodes = tree2->avl_numnodes;
895269229Sdelphij	tree2->avl_root = temp_node;
896269229Sdelphij	tree2->avl_numnodes = temp_numnodes;
897269229Sdelphij}
898269229Sdelphij
899168404Spjd/*
900168404Spjd * initialize a new AVL tree
901168404Spjd */
902168404Spjdvoid
903168404Spjdavl_create(avl_tree_t *tree, int (*compar) (const void *, const void *),
904168404Spjd    size_t size, size_t offset)
905168404Spjd{
906168404Spjd	ASSERT(tree);
907168404Spjd	ASSERT(compar);
908168404Spjd	ASSERT(size > 0);
909168404Spjd	ASSERT(size >= offset + sizeof (avl_node_t));
910168404Spjd#ifdef _LP64
911168404Spjd	ASSERT((offset & 0x7) == 0);
912168404Spjd#endif
913168404Spjd
914168404Spjd	tree->avl_compar = compar;
915168404Spjd	tree->avl_root = NULL;
916168404Spjd	tree->avl_numnodes = 0;
917168404Spjd	tree->avl_size = size;
918168404Spjd	tree->avl_offset = offset;
919168404Spjd}
920168404Spjd
921168404Spjd/*
922168404Spjd * Delete a tree.
923168404Spjd */
924168404Spjd/* ARGSUSED */
925168404Spjdvoid
926168404Spjdavl_destroy(avl_tree_t *tree)
927168404Spjd{
928168404Spjd	ASSERT(tree);
929168404Spjd	ASSERT(tree->avl_numnodes == 0);
930168404Spjd	ASSERT(tree->avl_root == NULL);
931168404Spjd}
932168404Spjd
933168404Spjd
934168404Spjd/*
935168404Spjd * Return the number of nodes in an AVL tree.
936168404Spjd */
937168404Spjdulong_t
938168404Spjdavl_numnodes(avl_tree_t *tree)
939168404Spjd{
940168404Spjd	ASSERT(tree);
941168404Spjd	return (tree->avl_numnodes);
942168404Spjd}
943168404Spjd
944185029Spjdboolean_t
945185029Spjdavl_is_empty(avl_tree_t *tree)
946185029Spjd{
947185029Spjd	ASSERT(tree);
948185029Spjd	return (tree->avl_numnodes == 0);
949185029Spjd}
950168404Spjd
951168404Spjd#define	CHILDBIT	(1L)
952168404Spjd
953168404Spjd/*
954168404Spjd * Post-order tree walk used to visit all tree nodes and destroy the tree
955264836Sdelphij * in post order. This is used for destroying a tree without paying any cost
956168404Spjd * for rebalancing it.
957168404Spjd *
958168404Spjd * example:
959168404Spjd *
960168404Spjd *	void *cookie = NULL;
961168404Spjd *	my_data_t *node;
962168404Spjd *
963168404Spjd *	while ((node = avl_destroy_nodes(tree, &cookie)) != NULL)
964168404Spjd *		free(node);
965168404Spjd *	avl_destroy(tree);
966168404Spjd *
967168404Spjd * The cookie is really an avl_node_t to the current node's parent and
968168404Spjd * an indication of which child you looked at last.
969168404Spjd *
970168404Spjd * On input, a cookie value of CHILDBIT indicates the tree is done.
971168404Spjd */
972168404Spjdvoid *
973168404Spjdavl_destroy_nodes(avl_tree_t *tree, void **cookie)
974168404Spjd{
975168404Spjd	avl_node_t	*node;
976168404Spjd	avl_node_t	*parent;
977168404Spjd	int		child;
978168404Spjd	void		*first;
979168404Spjd	size_t		off = tree->avl_offset;
980168404Spjd
981168404Spjd	/*
982168404Spjd	 * Initial calls go to the first node or it's right descendant.
983168404Spjd	 */
984168404Spjd	if (*cookie == NULL) {
985168404Spjd		first = avl_first(tree);
986168404Spjd
987168404Spjd		/*
988168404Spjd		 * deal with an empty tree
989168404Spjd		 */
990168404Spjd		if (first == NULL) {
991168404Spjd			*cookie = (void *)CHILDBIT;
992168404Spjd			return (NULL);
993168404Spjd		}
994168404Spjd
995168404Spjd		node = AVL_DATA2NODE(first, off);
996168404Spjd		parent = AVL_XPARENT(node);
997168404Spjd		goto check_right_side;
998168404Spjd	}
999168404Spjd
1000168404Spjd	/*
1001168404Spjd	 * If there is no parent to return to we are done.
1002168404Spjd	 */
1003168404Spjd	parent = (avl_node_t *)((uintptr_t)(*cookie) & ~CHILDBIT);
1004168404Spjd	if (parent == NULL) {
1005168404Spjd		if (tree->avl_root != NULL) {
1006168404Spjd			ASSERT(tree->avl_numnodes == 1);
1007168404Spjd			tree->avl_root = NULL;
1008168404Spjd			tree->avl_numnodes = 0;
1009168404Spjd		}
1010168404Spjd		return (NULL);
1011168404Spjd	}
1012168404Spjd
1013168404Spjd	/*
1014168404Spjd	 * Remove the child pointer we just visited from the parent and tree.
1015168404Spjd	 */
1016168404Spjd	child = (uintptr_t)(*cookie) & CHILDBIT;
1017168404Spjd	parent->avl_child[child] = NULL;
1018168404Spjd	ASSERT(tree->avl_numnodes > 1);
1019168404Spjd	--tree->avl_numnodes;
1020168404Spjd
1021168404Spjd	/*
1022168404Spjd	 * If we just did a right child or there isn't one, go up to parent.
1023168404Spjd	 */
1024168404Spjd	if (child == 1 || parent->avl_child[1] == NULL) {
1025168404Spjd		node = parent;
1026168404Spjd		parent = AVL_XPARENT(parent);
1027168404Spjd		goto done;
1028168404Spjd	}
1029168404Spjd
1030168404Spjd	/*
1031168404Spjd	 * Do parent's right child, then leftmost descendent.
1032168404Spjd	 */
1033168404Spjd	node = parent->avl_child[1];
1034168404Spjd	while (node->avl_child[0] != NULL) {
1035168404Spjd		parent = node;
1036168404Spjd		node = node->avl_child[0];
1037168404Spjd	}
1038168404Spjd
1039168404Spjd	/*
1040168404Spjd	 * If here, we moved to a left child. It may have one
1041168404Spjd	 * child on the right (when balance == +1).
1042168404Spjd	 */
1043168404Spjdcheck_right_side:
1044168404Spjd	if (node->avl_child[1] != NULL) {
1045168404Spjd		ASSERT(AVL_XBALANCE(node) == 1);
1046168404Spjd		parent = node;
1047168404Spjd		node = node->avl_child[1];
1048168404Spjd		ASSERT(node->avl_child[0] == NULL &&
1049168404Spjd		    node->avl_child[1] == NULL);
1050168404Spjd	} else {
1051168404Spjd		ASSERT(AVL_XBALANCE(node) <= 0);
1052168404Spjd	}
1053168404Spjd
1054168404Spjddone:
1055168404Spjd	if (parent == NULL) {
1056168404Spjd		*cookie = (void *)CHILDBIT;
1057168404Spjd		ASSERT(node == tree->avl_root);
1058168404Spjd	} else {
1059168404Spjd		*cookie = (void *)((uintptr_t)parent | AVL_XCHILD(node));
1060168404Spjd	}
1061168404Spjd
1062168404Spjd	return (AVL_NODE2DATA(node, off));
1063168404Spjd}
1064