1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/*
27 * Copyright (c) 2014 by Delphix. All rights reserved.
28 * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
29 */
30
31/*
32 * AVL - generic AVL tree implementation for kernel use
33 *
34 * A complete description of AVL trees can be found in many CS textbooks.
35 *
36 * Here is a very brief overview. An AVL tree is a binary search tree that is
37 * almost perfectly balanced. By "almost" perfectly balanced, we mean that at
38 * any given node, the left and right subtrees are allowed to differ in height
39 * by at most 1 level.
40 *
41 * This relaxation from a perfectly balanced binary tree allows doing
42 * insertion and deletion relatively efficiently. Searching the tree is
43 * still a fast operation, roughly O(log(N)).
44 *
45 * The key to insertion and deletion is a set of tree manipulations called
46 * rotations, which bring unbalanced subtrees back into the semi-balanced state.
47 *
48 * This implementation of AVL trees has the following peculiarities:
49 *
50 *	- The AVL specific data structures are physically embedded as fields
51 *	  in the "using" data structures.  To maintain generality the code
52 *	  must constantly translate between "avl_node_t *" and containing
53 *	  data structure "void *"s by adding/subtracting the avl_offset.
54 *
55 *	- Since the AVL data is always embedded in other structures, there is
56 *	  no locking or memory allocation in the AVL routines. This must be
57 *	  provided for by the enclosing data structure's semantics. Typically,
58 *	  avl_insert()/_add()/_remove()/avl_insert_here() require some kind of
59 *	  exclusive write lock. Other operations require a read lock.
60 *
61 *      - The implementation uses iteration instead of explicit recursion,
62 *	  since it is intended to run on limited size kernel stacks. Since
63 *	  there is no recursion stack present to move "up" in the tree,
64 *	  there is an explicit "parent" link in the avl_node_t.
65 *
66 *      - The left/right children pointers of a node are in an array.
67 *	  In the code, variables (instead of constants) are used to represent
68 *	  left and right indices.  The implementation is written as if it only
69 *	  dealt with left handed manipulations.  By changing the value assigned
70 *	  to "left", the code also works for right handed trees.  The
71 *	  following variables/terms are frequently used:
72 *
73 *		int left;	// 0 when dealing with left children,
74 *				// 1 for dealing with right children
75 *
76 *		int left_heavy;	// -1 when left subtree is taller at some node,
77 *				// +1 when right subtree is taller
78 *
79 *		int right;	// will be the opposite of left (0 or 1)
80 *		int right_heavy;// will be the opposite of left_heavy (-1 or 1)
81 *
82 *		int direction;  // 0 for "<" (ie. left child); 1 for ">" (right)
83 *
84 *	  Though it is a little more confusing to read the code, the approach
85 *	  allows using half as much code (and hence cache footprint) for tree
86 *	  manipulations and eliminates many conditional branches.
87 *
88 *	- The avl_index_t is an opaque "cookie" used to find nodes at or
89 *	  adjacent to where a new value would be inserted in the tree. The value
90 *	  is a modified "avl_node_t *".  The bottom bit (normally 0 for a
91 *	  pointer) is set to indicate if that the new node has a value greater
92 *	  than the value of the indicated "avl_node_t *".
93 *
94 * Note - in addition to userland (e.g. libavl and libutil) and the kernel
95 * (e.g. genunix), avl.c is compiled into ld.so and kmdb's genunix module,
96 * which each have their own compilation environments and subsequent
97 * requirements. Each of these environments must be considered when adding
98 * dependencies from avl.c.
99 */
100
101#include <sys/types.h>
102#include <sys/param.h>
103#include <sys/stdint.h>
104#include <sys/debug.h>
105#include <sys/avl.h>
106
107/*
108 * Small arrays to translate between balance (or diff) values and child indices.
109 *
110 * Code that deals with binary tree data structures will randomly use
111 * left and right children when examining a tree.  C "if()" statements
112 * which evaluate randomly suffer from very poor hardware branch prediction.
113 * In this code we avoid some of the branch mispredictions by using the
114 * following translation arrays. They replace random branches with an
115 * additional memory reference. Since the translation arrays are both very
116 * small the data should remain efficiently in cache.
117 */
118static const int  avl_child2balance[2]	= {-1, 1};
119static const int  avl_balance2child[]	= {0, 0, 1};
120
121
122/*
123 * Walk from one node to the previous valued node (ie. an infix walk
124 * towards the left). At any given node we do one of 2 things:
125 *
126 * - If there is a left child, go to it, then to it's rightmost descendant.
127 *
128 * - otherwise we return through parent nodes until we've come from a right
129 *   child.
130 *
131 * Return Value:
132 * NULL - if at the end of the nodes
133 * otherwise next node
134 */
135void *
136avl_walk(avl_tree_t *tree, void	*oldnode, int left)
137{
138	size_t off = tree->avl_offset;
139	avl_node_t *node = AVL_DATA2NODE(oldnode, off);
140	int right = 1 - left;
141	int was_child;
142
143
144	/*
145	 * nowhere to walk to if tree is empty
146	 */
147	if (node == NULL)
148		return (NULL);
149
150	/*
151	 * Visit the previous valued node. There are two possibilities:
152	 *
153	 * If this node has a left child, go down one left, then all
154	 * the way right.
155	 */
156	if (node->avl_child[left] != NULL) {
157		for (node = node->avl_child[left];
158		    node->avl_child[right] != NULL;
159		    node = node->avl_child[right])
160			;
161	/*
162	 * Otherwise, return thru left children as far as we can.
163	 */
164	} else {
165		for (;;) {
166			was_child = AVL_XCHILD(node);
167			node = AVL_XPARENT(node);
168			if (node == NULL)
169				return (NULL);
170			if (was_child == right)
171				break;
172		}
173	}
174
175	return (AVL_NODE2DATA(node, off));
176}
177
178/*
179 * Return the lowest valued node in a tree or NULL.
180 * (leftmost child from root of tree)
181 */
182void *
183avl_first(avl_tree_t *tree)
184{
185	avl_node_t *node;
186	avl_node_t *prev = NULL;
187	size_t off = tree->avl_offset;
188
189	for (node = tree->avl_root; node != NULL; node = node->avl_child[0])
190		prev = node;
191
192	if (prev != NULL)
193		return (AVL_NODE2DATA(prev, off));
194	return (NULL);
195}
196
197/*
198 * Return the highest valued node in a tree or NULL.
199 * (rightmost child from root of tree)
200 */
201void *
202avl_last(avl_tree_t *tree)
203{
204	avl_node_t *node;
205	avl_node_t *prev = NULL;
206	size_t off = tree->avl_offset;
207
208	for (node = tree->avl_root; node != NULL; node = node->avl_child[1])
209		prev = node;
210
211	if (prev != NULL)
212		return (AVL_NODE2DATA(prev, off));
213	return (NULL);
214}
215
216/*
217 * Access the node immediately before or after an insertion point.
218 *
219 * "avl_index_t" is a (avl_node_t *) with the bottom bit indicating a child
220 *
221 * Return value:
222 *	NULL: no node in the given direction
223 *	"void *"  of the found tree node
224 */
225void *
226avl_nearest(avl_tree_t *tree, avl_index_t where, int direction)
227{
228	int child = AVL_INDEX2CHILD(where);
229	avl_node_t *node = AVL_INDEX2NODE(where);
230	void *data;
231	size_t off = tree->avl_offset;
232
233	if (node == NULL) {
234		ASSERT(tree->avl_root == NULL);
235		return (NULL);
236	}
237	data = AVL_NODE2DATA(node, off);
238	if (child != direction)
239		return (data);
240
241	return (avl_walk(tree, data, direction));
242}
243
244
245/*
246 * Search for the node which contains "value".  The algorithm is a
247 * simple binary tree search.
248 *
249 * return value:
250 *	NULL: the value is not in the AVL tree
251 *		*where (if not NULL)  is set to indicate the insertion point
252 *	"void *"  of the found tree node
253 */
254void *
255avl_find(avl_tree_t *tree, const void *value, avl_index_t *where)
256{
257	avl_node_t *node;
258	avl_node_t *prev = NULL;
259	int child = 0;
260	int diff;
261	size_t off = tree->avl_offset;
262
263	for (node = tree->avl_root; node != NULL;
264	    node = node->avl_child[child]) {
265
266		prev = node;
267
268		diff = tree->avl_compar(value, AVL_NODE2DATA(node, off));
269		ASSERT(-1 <= diff && diff <= 1);
270		if (diff == 0) {
271#ifdef DEBUG
272			if (where != NULL)
273				*where = 0;
274#endif
275			return (AVL_NODE2DATA(node, off));
276		}
277		child = avl_balance2child[1 + diff];
278
279	}
280
281	if (where != NULL)
282		*where = AVL_MKINDEX(prev, child);
283
284	return (NULL);
285}
286
287
288/*
289 * Perform a rotation to restore balance at the subtree given by depth.
290 *
291 * This routine is used by both insertion and deletion. The return value
292 * indicates:
293 *	 0 : subtree did not change height
294 *	!0 : subtree was reduced in height
295 *
296 * The code is written as if handling left rotations, right rotations are
297 * symmetric and handled by swapping values of variables right/left[_heavy]
298 *
299 * On input balance is the "new" balance at "node". This value is either
300 * -2 or +2.
301 */
302static int
303avl_rotation(avl_tree_t *tree, avl_node_t *node, int balance)
304{
305	int left = !(balance < 0);	/* when balance = -2, left will be 0 */
306	int right = 1 - left;
307	int left_heavy = balance >> 1;
308	int right_heavy = -left_heavy;
309	avl_node_t *parent = AVL_XPARENT(node);
310	avl_node_t *child = node->avl_child[left];
311	avl_node_t *cright;
312	avl_node_t *gchild;
313	avl_node_t *gright;
314	avl_node_t *gleft;
315	int which_child = AVL_XCHILD(node);
316	int child_bal = AVL_XBALANCE(child);
317
318	/* BEGIN CSTYLED */
319	/*
320	 * case 1 : node is overly left heavy, the left child is balanced or
321	 * also left heavy. This requires the following rotation.
322	 *
323	 *                   (node bal:-2)
324	 *                    /           \
325	 *                   /             \
326	 *              (child bal:0 or -1)
327	 *              /    \
328	 *             /      \
329	 *                     cright
330	 *
331	 * becomes:
332	 *
333	 *              (child bal:1 or 0)
334	 *              /        \
335	 *             /          \
336	 *                        (node bal:-1 or 0)
337	 *                         /     \
338	 *                        /       \
339	 *                     cright
340	 *
341	 * we detect this situation by noting that child's balance is not
342	 * right_heavy.
343	 */
344	/* END CSTYLED */
345	if (child_bal != right_heavy) {
346
347		/*
348		 * compute new balance of nodes
349		 *
350		 * If child used to be left heavy (now balanced) we reduced
351		 * the height of this sub-tree -- used in "return...;" below
352		 */
353		child_bal += right_heavy; /* adjust towards right */
354
355		/*
356		 * move "cright" to be node's left child
357		 */
358		cright = child->avl_child[right];
359		node->avl_child[left] = cright;
360		if (cright != NULL) {
361			AVL_SETPARENT(cright, node);
362			AVL_SETCHILD(cright, left);
363		}
364
365		/*
366		 * move node to be child's right child
367		 */
368		child->avl_child[right] = node;
369		AVL_SETBALANCE(node, -child_bal);
370		AVL_SETCHILD(node, right);
371		AVL_SETPARENT(node, child);
372
373		/*
374		 * update the pointer into this subtree
375		 */
376		AVL_SETBALANCE(child, child_bal);
377		AVL_SETCHILD(child, which_child);
378		AVL_SETPARENT(child, parent);
379		if (parent != NULL)
380			parent->avl_child[which_child] = child;
381		else
382			tree->avl_root = child;
383
384		return (child_bal == 0);
385	}
386
387	/* BEGIN CSTYLED */
388	/*
389	 * case 2 : When node is left heavy, but child is right heavy we use
390	 * a different rotation.
391	 *
392	 *                   (node b:-2)
393	 *                    /   \
394	 *                   /     \
395	 *                  /       \
396	 *             (child b:+1)
397	 *              /     \
398	 *             /       \
399	 *                   (gchild b: != 0)
400	 *                     /  \
401	 *                    /    \
402	 *                 gleft   gright
403	 *
404	 * becomes:
405	 *
406	 *              (gchild b:0)
407	 *              /       \
408	 *             /         \
409	 *            /           \
410	 *        (child b:?)   (node b:?)
411	 *         /  \          /   \
412	 *        /    \        /     \
413	 *            gleft   gright
414	 *
415	 * computing the new balances is more complicated. As an example:
416	 *	 if gchild was right_heavy, then child is now left heavy
417	 *		else it is balanced
418	 */
419	/* END CSTYLED */
420	gchild = child->avl_child[right];
421	gleft = gchild->avl_child[left];
422	gright = gchild->avl_child[right];
423
424	/*
425	 * move gright to left child of node and
426	 *
427	 * move gleft to right child of node
428	 */
429	node->avl_child[left] = gright;
430	if (gright != NULL) {
431		AVL_SETPARENT(gright, node);
432		AVL_SETCHILD(gright, left);
433	}
434
435	child->avl_child[right] = gleft;
436	if (gleft != NULL) {
437		AVL_SETPARENT(gleft, child);
438		AVL_SETCHILD(gleft, right);
439	}
440
441	/*
442	 * move child to left child of gchild and
443	 *
444	 * move node to right child of gchild and
445	 *
446	 * fixup parent of all this to point to gchild
447	 */
448	balance = AVL_XBALANCE(gchild);
449	gchild->avl_child[left] = child;
450	AVL_SETBALANCE(child, (balance == right_heavy ? left_heavy : 0));
451	AVL_SETPARENT(child, gchild);
452	AVL_SETCHILD(child, left);
453
454	gchild->avl_child[right] = node;
455	AVL_SETBALANCE(node, (balance == left_heavy ? right_heavy : 0));
456	AVL_SETPARENT(node, gchild);
457	AVL_SETCHILD(node, right);
458
459	AVL_SETBALANCE(gchild, 0);
460	AVL_SETPARENT(gchild, parent);
461	AVL_SETCHILD(gchild, which_child);
462	if (parent != NULL)
463		parent->avl_child[which_child] = gchild;
464	else
465		tree->avl_root = gchild;
466
467	return (1);	/* the new tree is always shorter */
468}
469
470
471/*
472 * Insert a new node into an AVL tree at the specified (from avl_find()) place.
473 *
474 * Newly inserted nodes are always leaf nodes in the tree, since avl_find()
475 * searches out to the leaf positions.  The avl_index_t indicates the node
476 * which will be the parent of the new node.
477 *
478 * After the node is inserted, a single rotation further up the tree may
479 * be necessary to maintain an acceptable AVL balance.
480 */
481void
482avl_insert(avl_tree_t *tree, void *new_data, avl_index_t where)
483{
484	avl_node_t *node;
485	avl_node_t *parent = AVL_INDEX2NODE(where);
486	int old_balance;
487	int new_balance;
488	int which_child = AVL_INDEX2CHILD(where);
489	size_t off = tree->avl_offset;
490
491	ASSERT(tree);
492#ifdef _LP64
493	ASSERT(((uintptr_t)new_data & 0x7) == 0);
494#endif
495
496	node = AVL_DATA2NODE(new_data, off);
497
498	/*
499	 * First, add the node to the tree at the indicated position.
500	 */
501	++tree->avl_numnodes;
502
503	node->avl_child[0] = NULL;
504	node->avl_child[1] = NULL;
505
506	AVL_SETCHILD(node, which_child);
507	AVL_SETBALANCE(node, 0);
508	AVL_SETPARENT(node, parent);
509	if (parent != NULL) {
510		ASSERT(parent->avl_child[which_child] == NULL);
511		parent->avl_child[which_child] = node;
512	} else {
513		ASSERT(tree->avl_root == NULL);
514		tree->avl_root = node;
515	}
516	/*
517	 * Now, back up the tree modifying the balance of all nodes above the
518	 * insertion point. If we get to a highly unbalanced ancestor, we
519	 * need to do a rotation.  If we back out of the tree we are done.
520	 * If we brought any subtree into perfect balance (0), we are also done.
521	 */
522	for (;;) {
523		node = parent;
524		if (node == NULL)
525			return;
526
527		/*
528		 * Compute the new balance
529		 */
530		old_balance = AVL_XBALANCE(node);
531		new_balance = old_balance + avl_child2balance[which_child];
532
533		/*
534		 * If we introduced equal balance, then we are done immediately
535		 */
536		if (new_balance == 0) {
537			AVL_SETBALANCE(node, 0);
538			return;
539		}
540
541		/*
542		 * If both old and new are not zero we went
543		 * from -1 to -2 balance, do a rotation.
544		 */
545		if (old_balance != 0)
546			break;
547
548		AVL_SETBALANCE(node, new_balance);
549		parent = AVL_XPARENT(node);
550		which_child = AVL_XCHILD(node);
551	}
552
553	/*
554	 * perform a rotation to fix the tree and return
555	 */
556	(void) avl_rotation(tree, node, new_balance);
557}
558
559/*
560 * Insert "new_data" in "tree" in the given "direction" either after or
561 * before (AVL_AFTER, AVL_BEFORE) the data "here".
562 *
563 * Insertions can only be done at empty leaf points in the tree, therefore
564 * if the given child of the node is already present we move to either
565 * the AVL_PREV or AVL_NEXT and reverse the insertion direction. Since
566 * every other node in the tree is a leaf, this always works.
567 *
568 * To help developers using this interface, we assert that the new node
569 * is correctly ordered at every step of the way in DEBUG kernels.
570 */
571void
572avl_insert_here(
573	avl_tree_t *tree,
574	void *new_data,
575	void *here,
576	int direction)
577{
578	avl_node_t *node;
579	int child = direction;	/* rely on AVL_BEFORE == 0, AVL_AFTER == 1 */
580#ifdef DEBUG
581	int diff;
582#endif
583
584	ASSERT(tree != NULL);
585	ASSERT(new_data != NULL);
586	ASSERT(here != NULL);
587	ASSERT(direction == AVL_BEFORE || direction == AVL_AFTER);
588
589	/*
590	 * If corresponding child of node is not NULL, go to the neighboring
591	 * node and reverse the insertion direction.
592	 */
593	node = AVL_DATA2NODE(here, tree->avl_offset);
594
595#ifdef DEBUG
596	diff = tree->avl_compar(new_data, here);
597	ASSERT(-1 <= diff && diff <= 1);
598	ASSERT(diff != 0);
599	ASSERT(diff > 0 ? child == 1 : child == 0);
600#endif
601
602	if (node->avl_child[child] != NULL) {
603		node = node->avl_child[child];
604		child = 1 - child;
605		while (node->avl_child[child] != NULL) {
606#ifdef DEBUG
607			diff = tree->avl_compar(new_data,
608			    AVL_NODE2DATA(node, tree->avl_offset));
609			ASSERT(-1 <= diff && diff <= 1);
610			ASSERT(diff != 0);
611			ASSERT(diff > 0 ? child == 1 : child == 0);
612#endif
613			node = node->avl_child[child];
614		}
615#ifdef DEBUG
616		diff = tree->avl_compar(new_data,
617		    AVL_NODE2DATA(node, tree->avl_offset));
618		ASSERT(-1 <= diff && diff <= 1);
619		ASSERT(diff != 0);
620		ASSERT(diff > 0 ? child == 1 : child == 0);
621#endif
622	}
623	ASSERT(node->avl_child[child] == NULL);
624
625	avl_insert(tree, new_data, AVL_MKINDEX(node, child));
626}
627
628/*
629 * Add a new node to an AVL tree.
630 */
631void
632avl_add(avl_tree_t *tree, void *new_node)
633{
634	avl_index_t where;
635
636	/*
637	 * This is unfortunate.  We want to call panic() here, even for
638	 * non-DEBUG kernels.  In userland, however, we can't depend on anything
639	 * in libc or else the rtld build process gets confused.
640	 * Thankfully, rtld provides us with its own assfail() so we can use
641	 * that here.  We use assfail() directly to get a nice error message
642	 * in the core - much like what panic() does for crashdumps.
643	 */
644	if (avl_find(tree, new_node, &where) != NULL)
645#ifdef _KERNEL
646		panic("avl_find() succeeded inside avl_add()");
647#else
648		(void) assfail("avl_find() succeeded inside avl_add()",
649		    __FILE__, __LINE__);
650#endif
651	avl_insert(tree, new_node, where);
652}
653
654/*
655 * Delete a node from the AVL tree.  Deletion is similar to insertion, but
656 * with 2 complications.
657 *
658 * First, we may be deleting an interior node. Consider the following subtree:
659 *
660 *     d           c            c
661 *    / \         / \          / \
662 *   b   e       b   e        b   e
663 *  / \	        / \          /
664 * a   c       a            a
665 *
666 * When we are deleting node (d), we find and bring up an adjacent valued leaf
667 * node, say (c), to take the interior node's place. In the code this is
668 * handled by temporarily swapping (d) and (c) in the tree and then using
669 * common code to delete (d) from the leaf position.
670 *
671 * Secondly, an interior deletion from a deep tree may require more than one
672 * rotation to fix the balance. This is handled by moving up the tree through
673 * parents and applying rotations as needed. The return value from
674 * avl_rotation() is used to detect when a subtree did not change overall
675 * height due to a rotation.
676 */
677void
678avl_remove(avl_tree_t *tree, void *data)
679{
680	avl_node_t *delete;
681	avl_node_t *parent;
682	avl_node_t *node;
683	avl_node_t tmp;
684	int old_balance;
685	int new_balance;
686	int left;
687	int right;
688	int which_child;
689	size_t off = tree->avl_offset;
690
691	ASSERT(tree);
692
693	delete = AVL_DATA2NODE(data, off);
694
695	/*
696	 * Deletion is easiest with a node that has at most 1 child.
697	 * We swap a node with 2 children with a sequentially valued
698	 * neighbor node. That node will have at most 1 child. Note this
699	 * has no effect on the ordering of the remaining nodes.
700	 *
701	 * As an optimization, we choose the greater neighbor if the tree
702	 * is right heavy, otherwise the left neighbor. This reduces the
703	 * number of rotations needed.
704	 */
705	if (delete->avl_child[0] != NULL && delete->avl_child[1] != NULL) {
706
707		/*
708		 * choose node to swap from whichever side is taller
709		 */
710		old_balance = AVL_XBALANCE(delete);
711		left = avl_balance2child[old_balance + 1];
712		right = 1 - left;
713
714		/*
715		 * get to the previous value'd node
716		 * (down 1 left, as far as possible right)
717		 */
718		for (node = delete->avl_child[left];
719		    node->avl_child[right] != NULL;
720		    node = node->avl_child[right])
721			;
722
723		/*
724		 * create a temp placeholder for 'node'
725		 * move 'node' to delete's spot in the tree
726		 */
727		tmp = *node;
728
729		*node = *delete;
730		if (node->avl_child[left] == node)
731			node->avl_child[left] = &tmp;
732
733		parent = AVL_XPARENT(node);
734		if (parent != NULL)
735			parent->avl_child[AVL_XCHILD(node)] = node;
736		else
737			tree->avl_root = node;
738		AVL_SETPARENT(node->avl_child[left], node);
739		AVL_SETPARENT(node->avl_child[right], node);
740
741		/*
742		 * Put tmp where node used to be (just temporary).
743		 * It always has a parent and at most 1 child.
744		 */
745		delete = &tmp;
746		parent = AVL_XPARENT(delete);
747		parent->avl_child[AVL_XCHILD(delete)] = delete;
748		which_child = (delete->avl_child[1] != 0);
749		if (delete->avl_child[which_child] != NULL)
750			AVL_SETPARENT(delete->avl_child[which_child], delete);
751	}
752
753
754	/*
755	 * Here we know "delete" is at least partially a leaf node. It can
756	 * be easily removed from the tree.
757	 */
758	ASSERT(tree->avl_numnodes > 0);
759	--tree->avl_numnodes;
760	parent = AVL_XPARENT(delete);
761	which_child = AVL_XCHILD(delete);
762	if (delete->avl_child[0] != NULL)
763		node = delete->avl_child[0];
764	else
765		node = delete->avl_child[1];
766
767	/*
768	 * Connect parent directly to node (leaving out delete).
769	 */
770	if (node != NULL) {
771		AVL_SETPARENT(node, parent);
772		AVL_SETCHILD(node, which_child);
773	}
774	if (parent == NULL) {
775		tree->avl_root = node;
776		return;
777	}
778	parent->avl_child[which_child] = node;
779
780
781	/*
782	 * Since the subtree is now shorter, begin adjusting parent balances
783	 * and performing any needed rotations.
784	 */
785	do {
786
787		/*
788		 * Move up the tree and adjust the balance
789		 *
790		 * Capture the parent and which_child values for the next
791		 * iteration before any rotations occur.
792		 */
793		node = parent;
794		old_balance = AVL_XBALANCE(node);
795		new_balance = old_balance - avl_child2balance[which_child];
796		parent = AVL_XPARENT(node);
797		which_child = AVL_XCHILD(node);
798
799		/*
800		 * If a node was in perfect balance but isn't anymore then
801		 * we can stop, since the height didn't change above this point
802		 * due to a deletion.
803		 */
804		if (old_balance == 0) {
805			AVL_SETBALANCE(node, new_balance);
806			break;
807		}
808
809		/*
810		 * If the new balance is zero, we don't need to rotate
811		 * else
812		 * need a rotation to fix the balance.
813		 * If the rotation doesn't change the height
814		 * of the sub-tree we have finished adjusting.
815		 */
816		if (new_balance == 0)
817			AVL_SETBALANCE(node, new_balance);
818		else if (!avl_rotation(tree, node, new_balance))
819			break;
820	} while (parent != NULL);
821}
822
823#define	AVL_REINSERT(tree, obj)		\
824	avl_remove((tree), (obj));	\
825	avl_add((tree), (obj))
826
827boolean_t
828avl_update_lt(avl_tree_t *t, void *obj)
829{
830	void *neighbor;
831
832	ASSERT(((neighbor = AVL_NEXT(t, obj)) == NULL) ||
833	    (t->avl_compar(obj, neighbor) <= 0));
834
835	neighbor = AVL_PREV(t, obj);
836	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) < 0)) {
837		AVL_REINSERT(t, obj);
838		return (B_TRUE);
839	}
840
841	return (B_FALSE);
842}
843
844boolean_t
845avl_update_gt(avl_tree_t *t, void *obj)
846{
847	void *neighbor;
848
849	ASSERT(((neighbor = AVL_PREV(t, obj)) == NULL) ||
850	    (t->avl_compar(obj, neighbor) >= 0));
851
852	neighbor = AVL_NEXT(t, obj);
853	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) > 0)) {
854		AVL_REINSERT(t, obj);
855		return (B_TRUE);
856	}
857
858	return (B_FALSE);
859}
860
861boolean_t
862avl_update(avl_tree_t *t, void *obj)
863{
864	void *neighbor;
865
866	neighbor = AVL_PREV(t, obj);
867	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) < 0)) {
868		AVL_REINSERT(t, obj);
869		return (B_TRUE);
870	}
871
872	neighbor = AVL_NEXT(t, obj);
873	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) > 0)) {
874		AVL_REINSERT(t, obj);
875		return (B_TRUE);
876	}
877
878	return (B_FALSE);
879}
880
881void
882avl_swap(avl_tree_t *tree1, avl_tree_t *tree2)
883{
884	avl_node_t *temp_node;
885	ulong_t temp_numnodes;
886
887	ASSERT3P(tree1->avl_compar, ==, tree2->avl_compar);
888	ASSERT3U(tree1->avl_offset, ==, tree2->avl_offset);
889	ASSERT3U(tree1->avl_size, ==, tree2->avl_size);
890
891	temp_node = tree1->avl_root;
892	temp_numnodes = tree1->avl_numnodes;
893	tree1->avl_root = tree2->avl_root;
894	tree1->avl_numnodes = tree2->avl_numnodes;
895	tree2->avl_root = temp_node;
896	tree2->avl_numnodes = temp_numnodes;
897}
898
899/*
900 * initialize a new AVL tree
901 */
902void
903avl_create(avl_tree_t *tree, int (*compar) (const void *, const void *),
904    size_t size, size_t offset)
905{
906	ASSERT(tree);
907	ASSERT(compar);
908	ASSERT(size > 0);
909	ASSERT(size >= offset + sizeof (avl_node_t));
910#ifdef _LP64
911	ASSERT((offset & 0x7) == 0);
912#endif
913
914	tree->avl_compar = compar;
915	tree->avl_root = NULL;
916	tree->avl_numnodes = 0;
917	tree->avl_size = size;
918	tree->avl_offset = offset;
919}
920
921/*
922 * Delete a tree.
923 */
924/* ARGSUSED */
925void
926avl_destroy(avl_tree_t *tree)
927{
928	ASSERT(tree);
929	ASSERT(tree->avl_numnodes == 0);
930	ASSERT(tree->avl_root == NULL);
931}
932
933
934/*
935 * Return the number of nodes in an AVL tree.
936 */
937ulong_t
938avl_numnodes(avl_tree_t *tree)
939{
940	ASSERT(tree);
941	return (tree->avl_numnodes);
942}
943
944boolean_t
945avl_is_empty(avl_tree_t *tree)
946{
947	ASSERT(tree);
948	return (tree->avl_numnodes == 0);
949}
950
951#define	CHILDBIT	(1L)
952
953/*
954 * Post-order tree walk used to visit all tree nodes and destroy the tree
955 * in post order. This is used for destroying a tree without paying any cost
956 * for rebalancing it.
957 *
958 * example:
959 *
960 *	void *cookie = NULL;
961 *	my_data_t *node;
962 *
963 *	while ((node = avl_destroy_nodes(tree, &cookie)) != NULL)
964 *		free(node);
965 *	avl_destroy(tree);
966 *
967 * The cookie is really an avl_node_t to the current node's parent and
968 * an indication of which child you looked at last.
969 *
970 * On input, a cookie value of CHILDBIT indicates the tree is done.
971 */
972void *
973avl_destroy_nodes(avl_tree_t *tree, void **cookie)
974{
975	avl_node_t	*node;
976	avl_node_t	*parent;
977	int		child;
978	void		*first;
979	size_t		off = tree->avl_offset;
980
981	/*
982	 * Initial calls go to the first node or it's right descendant.
983	 */
984	if (*cookie == NULL) {
985		first = avl_first(tree);
986
987		/*
988		 * deal with an empty tree
989		 */
990		if (first == NULL) {
991			*cookie = (void *)CHILDBIT;
992			return (NULL);
993		}
994
995		node = AVL_DATA2NODE(first, off);
996		parent = AVL_XPARENT(node);
997		goto check_right_side;
998	}
999
1000	/*
1001	 * If there is no parent to return to we are done.
1002	 */
1003	parent = (avl_node_t *)((uintptr_t)(*cookie) & ~CHILDBIT);
1004	if (parent == NULL) {
1005		if (tree->avl_root != NULL) {
1006			ASSERT(tree->avl_numnodes == 1);
1007			tree->avl_root = NULL;
1008			tree->avl_numnodes = 0;
1009		}
1010		return (NULL);
1011	}
1012
1013	/*
1014	 * Remove the child pointer we just visited from the parent and tree.
1015	 */
1016	child = (uintptr_t)(*cookie) & CHILDBIT;
1017	parent->avl_child[child] = NULL;
1018	ASSERT(tree->avl_numnodes > 1);
1019	--tree->avl_numnodes;
1020
1021	/*
1022	 * If we just did a right child or there isn't one, go up to parent.
1023	 */
1024	if (child == 1 || parent->avl_child[1] == NULL) {
1025		node = parent;
1026		parent = AVL_XPARENT(parent);
1027		goto done;
1028	}
1029
1030	/*
1031	 * Do parent's right child, then leftmost descendent.
1032	 */
1033	node = parent->avl_child[1];
1034	while (node->avl_child[0] != NULL) {
1035		parent = node;
1036		node = node->avl_child[0];
1037	}
1038
1039	/*
1040	 * If here, we moved to a left child. It may have one
1041	 * child on the right (when balance == +1).
1042	 */
1043check_right_side:
1044	if (node->avl_child[1] != NULL) {
1045		ASSERT(AVL_XBALANCE(node) == 1);
1046		parent = node;
1047		node = node->avl_child[1];
1048		ASSERT(node->avl_child[0] == NULL &&
1049		    node->avl_child[1] == NULL);
1050	} else {
1051		ASSERT(AVL_XBALANCE(node) <= 0);
1052	}
1053
1054done:
1055	if (parent == NULL) {
1056		*cookie = (void *)CHILDBIT;
1057		ASSERT(node == tree->avl_root);
1058	} else {
1059		*cookie = (void *)((uintptr_t)parent | AVL_XCHILD(node));
1060	}
1061
1062	return (AVL_NODE2DATA(node, off));
1063}
1064